Compare commits

...

4 Commits

Author SHA1 Message Date
AustinMroz
eb02343ff0
Merge ce5afecc36 into ff838657fa 2025-01-09 09:12:29 -05:00
comfyanonymous
ff838657fa Cleaner handling of attention mask in ltxv model code. 2025-01-09 07:12:03 -05:00
Austin Mroz
ce5afecc36
Only use events for devices supporting nonblocking 2024-12-23 16:43:43 -06:00
Austin Mroz
813b8df872
Fix race condition in preview code.
In the previous preview code, when possible, a non-blocking `to`
operation is performed and, immediately after, the output tensor is used
to create an image. If this non-blocking operation has not completed,
PIL makes a copy of the uninitialized memory to produce an image.
Generally, this will either contain zeros, or the result of a previously
generated preview. This results in both incorrect output, and wasted
computation (unless the memory this output was eventually copied to is
reallocated and displayed instead of a future preview).

To resolve this, the state of the preview generation is tracked with an
event.
- The PIL image is created with no copy
- The preview image is not sent to from the server until ready
- Completion of this event is polled with a reasonably slow frequency
- A new preview is not created if a previous preview has not completed
2024-12-21 04:13:35 -06:00
3 changed files with 27 additions and 14 deletions

View File

@ -456,9 +456,8 @@ class LTXVModel(torch.nn.Module):
x = self.patchify_proj(x)
timestep = timestep * 1000.0
attention_mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1]))
attention_mask = attention_mask.masked_fill(attention_mask.to(torch.bool), float("-inf")) # not sure about this
# attention_mask = (context != 0).any(dim=2).to(dtype=x.dtype)
if attention_mask is not None and not torch.is_floating_point(attention_mask):
attention_mask = (attention_mask - 1).to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])) * torch.finfo(x.dtype).max
pe = precompute_freqs_cis(indices_grid, dim=self.inner_dim, out_dtype=x.dtype)

View File

@ -9,20 +9,30 @@ import logging
MAX_PREVIEW_RESOLUTION = args.preview_size
def preview_to_image(latent_image):
latents_ubyte = (((latent_image + 1.0) / 2.0).clamp(0, 1) # change scale from -1..1 to 0..1
.mul(0xFF) # to 0..255
).to(device="cpu", dtype=torch.uint8, non_blocking=comfy.model_management.device_supports_non_blocking(latent_image.device))
return Image.fromarray(latents_ubyte.numpy())
class LatentPreviewer:
def decode_latent_to_preview(self, x0):
pass
def decode_latent_to_preview_image(self, preview_format, x0):
preview_image = self.decode_latent_to_preview(x0)
return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION)
if hasattr(self, 'event') and not self.event.query():
# A previous preview is still being processed
return None
preview_tensor = self.decode_latent_to_preview(x0)
if comfy.model_management.device_supports_non_blocking(preview_tensor.device):
latents_ubyte = (((preview_tensor + 1.0) / 2.0).clamp(0, 1) # change scale from -1..1 to 0..1
.mul(0xFF) # to 0..255
).to(device="cpu", dtype=torch.uint8, non_blocking=True)
latents_rgbx = torch.zeros(latents_ubyte.shape[:2] + (4,), device="cpu", dtype=torch.uint8)
latents_rgbx[:,:,:3] = latents_ubyte
self.event = torch.cuda.Event()
self.event.record()
preview_image = Image.frombuffer('RGBX', (latents_ubyte.shape[1], latents_ubyte.shape[0]),
latents_rgbx.numpy().data, 'raw', 'RGBX', 0, 1)
return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION, self.event)
latents_ubyte = (((preview_tensor + 1.0) / 2.0).clamp(0, 1) # change scale from -1..1 to 0..1
.mul(0xFF) # to 0..255
).to(device="cpu", dtype=torch.uint8, non_blocking=False)
return ("JPEG", Image.fromarray(latents_ubyte.numpy()), MAX_PREVIEW_RESOLUTION)
class TAESDPreviewerImpl(LatentPreviewer):
def __init__(self, taesd):
@ -30,7 +40,7 @@ class TAESDPreviewerImpl(LatentPreviewer):
def decode_latent_to_preview(self, x0):
x_sample = self.taesd.decode(x0[:1])[0].movedim(0, 2)
return preview_to_image(x_sample)
return x_sample
class Latent2RGBPreviewer(LatentPreviewer):
@ -53,7 +63,7 @@ class Latent2RGBPreviewer(LatentPreviewer):
latent_image = torch.nn.functional.linear(x0.movedim(0, -1), self.latent_rgb_factors, bias=self.latent_rgb_factors_bias)
# latent_image = x0[0].permute(1, 2, 0) @ self.latent_rgb_factors
return preview_to_image(latent_image)
return latent_image
def get_previewer(device, latent_format):

View File

@ -752,6 +752,10 @@ class PromptServer():
image_type = image_data[0]
image = image_data[1]
max_size = image_data[2]
if len(image_data) > 3:
event = image_data[3]
while not event.query():
await asyncio.sleep(.01)
if max_size is not None:
if hasattr(Image, 'Resampling'):
resampling = Image.Resampling.BILINEAR