Adjust used dtypes for hunyuan video VAE and diffusion model.

2025-06-18 01:05:34 +08:00 · 2024-12-16 23:31:10 -05:00 · 2024-12-16 23:31:10 -05:00 · 39b1fc4ccc
commit 39b1fc4ccc
parent 0b25f47bd9
2 changed files with 2 additions and 1 deletions
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -323,6 +323,7 @@ class VAE:
                self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
                self.memory_used_decode = lambda shape, dtype: (1500 * shape[2] * shape[3] * shape[4] * (4 * 8 * 8)) * model_management.dtype_size(dtype)
                self.memory_used_encode = lambda shape, dtype: (900 * max(shape[2], 2) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]

            elif "decoder.layers.1.layers.0.beta" in sd:
                self.first_stage_model = AudioOobleckVAE()
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -753,7 +753,7 @@ class HunyuanVideo(supported_models_base.BASE):

    memory_usage_factor = 2.0 #TODO

-    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+    supported_inference_dtypes = [torch.bfloat16, torch.float32]

    vae_key_prefix = ["vae."]
    text_encoder_key_prefix = ["text_encoders."]