diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index a48f60c7..f9fd16d8 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -190,7 +190,21 @@ class Mochi(LatentFormat): 0.9294154431013696, 1.3720942357788521, 0.881393668867029, 0.9168315692124348, 0.9185249279345552, 0.9274757570805041]).view(1, self.latent_channels, 1, 1, 1) - self.latent_rgb_factors = None #TODO + self.latent_rgb_factors =[ + [-0.0069, -0.0045, 0.0018], + [ 0.0154, -0.0692, -0.0274], + [ 0.0333, 0.0019, 0.0206], + [-0.1390, 0.0628, 0.1678], + [-0.0725, 0.0134, -0.1898], + [ 0.0074, -0.0270, -0.0209], + [-0.0176, -0.0277, -0.0221], + [ 0.5294, 0.5204, 0.3852], + [-0.0326, -0.0446, -0.0143], + [-0.0659, 0.0153, -0.0153], + [ 0.0185, -0.0217, 0.0014], + [-0.0396, -0.0495, -0.0281] + ] + self.latent_rgb_factors_bias = [-0.0940, -0.1418, -0.1453] self.taesd_decoder_name = None #TODO def process_in(self, latent): diff --git a/latent_preview.py b/latent_preview.py index ae9211a2..d60e68d5 100644 --- a/latent_preview.py +++ b/latent_preview.py @@ -47,7 +47,12 @@ class Latent2RGBPreviewer(LatentPreviewer): if self.latent_rgb_factors_bias is not None: self.latent_rgb_factors_bias = self.latent_rgb_factors_bias.to(dtype=x0.dtype, device=x0.device) - latent_image = torch.nn.functional.linear(x0[0].permute(1, 2, 0), self.latent_rgb_factors, bias=self.latent_rgb_factors_bias) + if x0.ndim == 5: + x0 = x0[0, :, 0] + else: + x0 = x0[0] + + latent_image = torch.nn.functional.linear(x0.movedim(0, -1), self.latent_rgb_factors, bias=self.latent_rgb_factors_bias) # latent_image = x0[0].permute(1, 2, 0) @ self.latent_rgb_factors return preview_to_image(latent_image)