From 50614f1b7933244c01d85880c41b50bbd0c4de8b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Mar 2025 13:56:11 -0400 Subject: [PATCH] Fix regression with clip vision. --- comfy/clip_vision.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index 25baf5ca..87d32a66 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -36,7 +36,8 @@ def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], s return (image - mean.view([3,1,1])) / std.view([3,1,1]) IMAGE_ENCODERS = { - "clip_vision": comfy.clip_model.CLIPVisionModelProjection, + "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection, + "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection, "dinov2": comfy.image_encoders.dino2.Dinov2Model, } @@ -48,7 +49,7 @@ class ClipVisionModel(): self.image_size = config.get("image_size", 224) self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073]) self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711]) - model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision")) + model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision_model")) self.load_device = comfy.model_management.text_encoder_device() offload_device = comfy.model_management.text_encoder_offload_device() self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)