From 47acb3d73e7df2a474598278ffe5a666ee4686cf Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sun, 5 Mar 2023 18:39:25 -0500
Subject: [PATCH 1/8] Implement support for t2i style model.

It needs the CLIPVision model so I added CLIPVisionLoader and CLIPVisionEncode.

Put the clip vision model in models/clip_vision
Put the t2i style model in models/style_models

StyleModelLoader to load it, StyleModelApply to apply it
ConditioningAppend to append the conditioning it outputs to a positive one.
---
 comfy/sd.py                                   | 26 ++++--
 comfy_extras/clip_vision.py                   | 32 +++++++
 .../clip_vision/put_clip_vision_models_here   |  0
 models/style_models/put_t2i_style_model_here  |  0
 nodes.py                                      | 90 +++++++++++++++++++
 5 files changed, 143 insertions(+), 5 deletions(-)
 create mode 100644 comfy_extras/clip_vision.py
 create mode 100644 models/clip_vision/put_clip_vision_models_here
 create mode 100644 models/style_models/put_t2i_style_model_here

diff --git a/comfy/sd.py b/comfy/sd.py
index 19722113..eb4ea793 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -613,11 +613,7 @@ class T2IAdapter:
 def load_t2i_adapter(ckpt_path, model=None):
     t2i_data = load_torch_file(ckpt_path)
     keys = t2i_data.keys()
-    if "style_embedding" in keys:
-        pass
-        # TODO
-        # model_ad = adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
-    elif "body.0.in_conv.weight" in keys:
+    if "body.0.in_conv.weight" in keys:
         cin = t2i_data['body.0.in_conv.weight'].shape[1]
         model_ad = adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
     else:
@@ -626,6 +622,26 @@ def load_t2i_adapter(ckpt_path, model=None):
     model_ad.load_state_dict(t2i_data)
     return T2IAdapter(model_ad, cin // 64)
 
+
+class StyleModel:
+    def __init__(self, model, device="cpu"):
+        self.model = model
+
+    def get_cond(self, input):
+        return self.model(input.last_hidden_state)
+
+
+def load_style_model(ckpt_path):
+    model_data = load_torch_file(ckpt_path)
+    keys = model_data.keys()
+    if "style_embedding" in keys:
+        model = adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
+    else:
+        raise Exception("invalid style model {}".format(ckpt_path))
+    model.load_state_dict(model_data)
+    return StyleModel(model)
+
+
 def load_clip(ckpt_path, embedding_directory=None):
     clip_data = load_torch_file(ckpt_path)
     config = {}
diff --git a/comfy_extras/clip_vision.py b/comfy_extras/clip_vision.py
new file mode 100644
index 00000000..58d79a83
--- /dev/null
+++ b/comfy_extras/clip_vision.py
@@ -0,0 +1,32 @@
+from transformers import CLIPVisionModel, CLIPVisionConfig, CLIPImageProcessor
+from comfy.sd import load_torch_file
+import os
+
+class ClipVisionModel():
+    def __init__(self):
+        json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config.json")
+        config = CLIPVisionConfig.from_json_file(json_config)
+        self.model = CLIPVisionModel(config)
+        self.processor = CLIPImageProcessor(crop_size=224,
+                                            do_center_crop=True,
+                                            do_convert_rgb=True,
+                                            do_normalize=True,
+                                            do_resize=True,
+                                            image_mean=[ 0.48145466,0.4578275,0.40821073],
+                                            image_std=[0.26862954,0.26130258,0.27577711],
+                                            resample=3, #bicubic
+                                            size=224)
+
+    def load_sd(self, sd):
+        self.model.load_state_dict(sd, strict=False)
+
+    def encode_image(self, image):
+        inputs = self.processor(images=[image[0]], return_tensors="pt")
+        outputs = self.model(**inputs)
+        return outputs
+
+def load(ckpt_path):
+    clip_data = load_torch_file(ckpt_path)
+    clip = ClipVisionModel()
+    clip.load_sd(clip_data)
+    return clip
diff --git a/models/clip_vision/put_clip_vision_models_here b/models/clip_vision/put_clip_vision_models_here
new file mode 100644
index 00000000..e69de29b
diff --git a/models/style_models/put_t2i_style_model_here b/models/style_models/put_t2i_style_model_here
new file mode 100644
index 00000000..e69de29b
diff --git a/nodes.py b/nodes.py
index 26dad572..4cbfe755 100644
--- a/nodes.py
+++ b/nodes.py
@@ -18,6 +18,8 @@ import comfy.samplers
 import comfy.sd
 import comfy.utils
 
+import comfy_extras.clip_vision
+
 import model_management
 import importlib
 
@@ -370,6 +372,89 @@ class CLIPLoader:
         clip = comfy.sd.load_clip(ckpt_path=clip_path, embedding_directory=CheckpointLoader.embedding_directory)
         return (clip,)
 
+class CLIPVisionLoader:
+    models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models")
+    clip_dir = os.path.join(models_dir, "clip_vision")
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "clip_name": (filter_files_extensions(recursive_search(s.clip_dir), supported_pt_extensions), ),
+                             }}
+    RETURN_TYPES = ("CLIP_VISION",)
+    FUNCTION = "load_clip"
+
+    CATEGORY = "loaders"
+
+    def load_clip(self, clip_name):
+        clip_path = os.path.join(self.clip_dir, clip_name)
+        clip_vision = comfy_extras.clip_vision.load(clip_path)
+        return (clip_vision,)
+
+class CLIPVisionEncode:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "clip_vision": ("CLIP_VISION",),
+                              "image": ("IMAGE",)
+                             }}
+    RETURN_TYPES = ("CLIP_VISION_EMBED",)
+    FUNCTION = "encode"
+
+    CATEGORY = "conditioning"
+
+    def encode(self, clip_vision, image):
+        output = clip_vision.encode_image(image)
+        return (output,)
+
+class StyleModelLoader:
+    models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models")
+    style_model_dir = os.path.join(models_dir, "style_models")
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "style_model_name": (filter_files_extensions(recursive_search(s.style_model_dir), supported_pt_extensions), )}}
+
+    RETURN_TYPES = ("STYLE_MODEL",)
+    FUNCTION = "load_style_model"
+
+    CATEGORY = "loaders"
+
+    def load_style_model(self, style_model_name):
+        style_model_path = os.path.join(self.style_model_dir, style_model_name)
+        style_model = comfy.sd.load_style_model(style_model_path)
+        return (style_model,)
+
+
+class StyleModelApply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"clip_vision_embed": ("CLIP_VISION_EMBED", ),
+                             "style_model": ("STYLE_MODEL", )
+                             }}
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "apply_stylemodel"
+
+    CATEGORY = "conditioning"
+
+    def apply_stylemodel(self, clip_vision_embed, style_model):
+        c = style_model.get_cond(clip_vision_embed)
+        return ([[c, {}]], )
+
+
+class ConditioningAppend:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", )}}
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "append"
+
+    CATEGORY = "conditioning"
+
+    def append(self, conditioning_to, conditioning_from):
+        c = []
+        to_append = conditioning_from[0][0]
+        for t in conditioning_to:
+            n = [torch.cat((t[0],to_append), dim=1), t[1].copy()]
+            c.append(n)
+        return (c, )
+
 class EmptyLatentImage:
     def __init__(self, device="cpu"):
         self.device = device
@@ -866,6 +951,11 @@ NODE_CLASS_MAPPINGS = {
     "LatentCrop": LatentCrop,
     "LoraLoader": LoraLoader,
     "CLIPLoader": CLIPLoader,
+    "StyleModelLoader": StyleModelLoader,
+    "CLIPVisionLoader": CLIPVisionLoader,
+    "CLIPVisionEncode": CLIPVisionEncode,
+    "StyleModelApply":StyleModelApply,
+    "ConditioningAppend":ConditioningAppend,
     "ControlNetApply": ControlNetApply,
     "ControlNetLoader": ControlNetLoader,
     "DiffControlNetLoader": DiffControlNetLoader,

From 7ec1dd25a2583e68049cc08525f0099d7e0ecb3b Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 01:30:17 -0500
Subject: [PATCH 2/8] A tiny bit of reorganizing.

---
 comfy_extras/clip_vision_config.json | 23 +++++++++++++++++++++++
 nodes.py                             | 24 ++++++++++++------------
 2 files changed, 35 insertions(+), 12 deletions(-)
 create mode 100644 comfy_extras/clip_vision_config.json

diff --git a/comfy_extras/clip_vision_config.json b/comfy_extras/clip_vision_config.json
new file mode 100644
index 00000000..0e4db13d
--- /dev/null
+++ b/comfy_extras/clip_vision_config.json
@@ -0,0 +1,23 @@
+{
+  "_name_or_path": "openai/clip-vit-large-patch14",
+  "architectures": [
+    "CLIPVisionModel"
+  ],
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 1024,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 14,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.24.0"
+}
diff --git a/nodes.py b/nodes.py
index 4cbfe755..112c9ea8 100644
--- a/nodes.py
+++ b/nodes.py
@@ -395,10 +395,10 @@ class CLIPVisionEncode:
         return {"required": { "clip_vision": ("CLIP_VISION",),
                               "image": ("IMAGE",)
                              }}
-    RETURN_TYPES = ("CLIP_VISION_EMBED",)
+    RETURN_TYPES = ("CLIP_VISION_OUTPUT",)
     FUNCTION = "encode"
 
-    CATEGORY = "conditioning"
+    CATEGORY = "conditioning/style_model"
 
     def encode(self, clip_vision, image):
         output = clip_vision.encode_image(image)
@@ -425,16 +425,16 @@ class StyleModelLoader:
 class StyleModelApply:
     @classmethod
     def INPUT_TYPES(s):
-        return {"required": {"clip_vision_embed": ("CLIP_VISION_EMBED", ),
+        return {"required": {"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
                              "style_model": ("STYLE_MODEL", )
                              }}
     RETURN_TYPES = ("CONDITIONING",)
     FUNCTION = "apply_stylemodel"
 
-    CATEGORY = "conditioning"
+    CATEGORY = "conditioning/style_model"
 
-    def apply_stylemodel(self, clip_vision_embed, style_model):
-        c = style_model.get_cond(clip_vision_embed)
+    def apply_stylemodel(self, clip_vision_output, style_model):
+        c = style_model.get_cond(clip_vision_output)
         return ([[c, {}]], )
 
 
@@ -445,7 +445,7 @@ class ConditioningAppend:
     RETURN_TYPES = ("CONDITIONING",)
     FUNCTION = "append"
 
-    CATEGORY = "conditioning"
+    CATEGORY = "conditioning/style_model"
 
     def append(self, conditioning_to, conditioning_from):
         c = []
@@ -504,7 +504,7 @@ class LatentRotate:
     RETURN_TYPES = ("LATENT",)
     FUNCTION = "rotate"
 
-    CATEGORY = "latent"
+    CATEGORY = "latent/transform"
 
     def rotate(self, samples, rotation):
         s = samples.copy()
@@ -528,7 +528,7 @@ class LatentFlip:
     RETURN_TYPES = ("LATENT",)
     FUNCTION = "flip"
 
-    CATEGORY = "latent"
+    CATEGORY = "latent/transform"
 
     def flip(self, samples, flip_method):
         s = samples.copy()
@@ -593,7 +593,7 @@ class LatentCrop:
     RETURN_TYPES = ("LATENT",)
     FUNCTION = "crop"
 
-    CATEGORY = "latent"
+    CATEGORY = "latent/transform"
 
     def crop(self, samples, width, height, x, y):
         s = samples.copy()
@@ -951,8 +951,6 @@ NODE_CLASS_MAPPINGS = {
     "LatentCrop": LatentCrop,
     "LoraLoader": LoraLoader,
     "CLIPLoader": CLIPLoader,
-    "StyleModelLoader": StyleModelLoader,
-    "CLIPVisionLoader": CLIPVisionLoader,
     "CLIPVisionEncode": CLIPVisionEncode,
     "StyleModelApply":StyleModelApply,
     "ConditioningAppend":ConditioningAppend,
@@ -960,6 +958,8 @@ NODE_CLASS_MAPPINGS = {
     "ControlNetLoader": ControlNetLoader,
     "DiffControlNetLoader": DiffControlNetLoader,
     "T2IAdapterLoader": T2IAdapterLoader,
+    "StyleModelLoader": StyleModelLoader,
+    "CLIPVisionLoader": CLIPVisionLoader,
     "VAEDecodeTiled": VAEDecodeTiled,
 }
 

From 8515d963286550db1d7189fec229629acde5d6d3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 01:48:18 -0500
Subject: [PATCH 3/8] It's more logical for the StyleModelApply to append it
 directly.

---
 nodes.py | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/nodes.py b/nodes.py
index 112c9ea8..84510a05 100644
--- a/nodes.py
+++ b/nodes.py
@@ -425,33 +425,20 @@ class StyleModelLoader:
 class StyleModelApply:
     @classmethod
     def INPUT_TYPES(s):
-        return {"required": {"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
-                             "style_model": ("STYLE_MODEL", )
+        return {"required": {"conditioning": ("CONDITIONING", ),
+                             "style_model": ("STYLE_MODEL", ),
+                             "clip_vision_output": ("CLIP_VISION_OUTPUT", ),
                              }}
     RETURN_TYPES = ("CONDITIONING",)
     FUNCTION = "apply_stylemodel"
 
     CATEGORY = "conditioning/style_model"
 
-    def apply_stylemodel(self, clip_vision_output, style_model):
-        c = style_model.get_cond(clip_vision_output)
-        return ([[c, {}]], )
-
-
-class ConditioningAppend:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", )}}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "conditioning/style_model"
-
-    def append(self, conditioning_to, conditioning_from):
+    def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
+        cond = style_model.get_cond(clip_vision_output)
         c = []
-        to_append = conditioning_from[0][0]
-        for t in conditioning_to:
-            n = [torch.cat((t[0],to_append), dim=1), t[1].copy()]
+        for t in conditioning:
+            n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
             c.append(n)
         return (c, )
 
@@ -952,8 +939,7 @@ NODE_CLASS_MAPPINGS = {
     "LoraLoader": LoraLoader,
     "CLIPLoader": CLIPLoader,
     "CLIPVisionEncode": CLIPVisionEncode,
-    "StyleModelApply":StyleModelApply,
-    "ConditioningAppend":ConditioningAppend,
+    "StyleModelApply": StyleModelApply,
     "ControlNetApply": ControlNetApply,
     "ControlNetLoader": ControlNetLoader,
     "DiffControlNetLoader": DiffControlNetLoader,

From 799f510d0cced79d39b827f17d79f3a03e19b126 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 02:07:58 -0500
Subject: [PATCH 4/8] Add some links to notebook for the t2i styles model.

---
 notebooks/comfyui_colab.ipynb | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/notebooks/comfyui_colab.ipynb b/notebooks/comfyui_colab.ipynb
index 5315ab08..06278b27 100644
--- a/notebooks/comfyui_colab.ipynb
+++ b/notebooks/comfyui_colab.ipynb
@@ -89,6 +89,11 @@
         "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_color_sd14v1.pth -P ./models/t2i_adapter/\n",
         "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_canny_sd14v1.pth -P ./models/t2i_adapter/\n",
         "\n",
+        "# T2I Styles Model\n",
+        "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_style_sd14v1.pth -P ./models/style_models/\n",
+        "\n",
+        "# CLIPVision model (needed for styles model)\n",
+        "#!wget -c https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin -O ./models/clip_vision/clip_vit14.bin\n",
         "\n",
         "\n",
         "# ControlNet\n",

From afff30fc0a4d11be4823ccce78d281a4e504c914 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 10:50:50 -0500
Subject: [PATCH 5/8] Add --cpu to use the cpu for inference.

---
 comfy/model_management.py | 16 +++++++++++++++-
 comfy/samplers.py         |  2 +-
 comfy/sd.py               | 16 +++++++++++-----
 main.py                   |  1 +
 nodes.py                  | 22 +++++++---------------
 5 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 32159b82..4b061c32 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -31,6 +31,8 @@ try:
 except:
     pass
 
+if "--cpu" in sys.argv:
+    vram_state = CPU
 if "--lowvram" in sys.argv:
     set_vram_to = LOW_VRAM
 if "--novram" in sys.argv:
@@ -118,6 +120,8 @@ def load_model_gpu(model):
 def load_controlnet_gpu(models):
     global current_gpu_controlnets
     global vram_state
+    if vram_state == CPU:
+        return
 
     if vram_state == LOW_VRAM or vram_state == NO_VRAM:
         #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
@@ -144,10 +148,20 @@ def unload_if_low_vram(model):
         return model.cpu()
     return model
 
+def get_torch_device():
+    if vram_state == CPU:
+        return torch.device("cpu")
+    else:
+        return torch.cuda.current_device()
+
+def get_autocast_device(dev):
+    if hasattr(dev, 'type'):
+        return dev.type
+    return "cuda"
 
 def get_free_memory(dev=None, torch_free_too=False):
     if dev is None:
-        dev = torch.cuda.current_device()
+        dev = get_torch_device()
 
     if hasattr(dev, 'type') and dev.type == 'cpu':
         mem_free_total = psutil.virtual_memory().available
diff --git a/comfy/samplers.py b/comfy/samplers.py
index 3562f89d..569c32f4 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -438,7 +438,7 @@ class KSampler:
         else:
             max_denoise = True
 
-        with precision_scope(self.device):
+        with precision_scope(model_management.get_autocast_device(self.device)):
             if self.sampler == "uni_pc":
                 samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask)
             elif self.sampler == "uni_pc_bh2":
diff --git a/comfy/sd.py b/comfy/sd.py
index eb4ea793..67a207cb 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -299,7 +299,7 @@ class CLIP:
         return cond
 
 class VAE:
-    def __init__(self, ckpt_path=None, scale_factor=0.18215, device="cuda", config=None):
+    def __init__(self, ckpt_path=None, scale_factor=0.18215, device=None, config=None):
         if config is None:
             #default SD1.x/SD2.x VAE parameters
             ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
@@ -308,6 +308,8 @@ class VAE:
             self.first_stage_model = AutoencoderKL(**(config['params']), ckpt_path=ckpt_path)
         self.first_stage_model = self.first_stage_model.eval()
         self.scale_factor = scale_factor
+        if device is None:
+            device = model_management.get_torch_device()
         self.device = device
 
     def decode(self, samples):
@@ -381,11 +383,13 @@ def resize_image_to(tensor, target_latent_tensor, batched_number):
         return torch.cat([tensor] * batched_number, dim=0)
 
 class ControlNet:
-    def __init__(self, control_model, device="cuda"):
+    def __init__(self, control_model, device=None):
         self.control_model = control_model
         self.cond_hint_original = None
         self.cond_hint = None
         self.strength = 1.0
+        if device is None:
+            device = model_management.get_torch_device()
         self.device = device
         self.previous_controlnet = None
 
@@ -406,7 +410,7 @@ class ControlNet:
         else:
             precision_scope = contextlib.nullcontext
 
-        with precision_scope(self.device):
+        with precision_scope(model_management.get_autocast_device(self.device)):
             self.control_model = model_management.load_if_low_vram(self.control_model)
             control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=cond_txt)
             self.control_model = model_management.unload_if_low_vram(self.control_model)
@@ -481,7 +485,7 @@ def load_controlnet(ckpt_path, model=None):
     context_dim = controlnet_data[key].shape[1]
 
     use_fp16 = False
-    if controlnet_data[key].dtype == torch.float16:
+    if model_management.should_use_fp16() and controlnet_data[key].dtype == torch.float16:
         use_fp16 = True
 
     control_model = cldm.ControlNet(image_size=32,
@@ -527,10 +531,12 @@ def load_controlnet(ckpt_path, model=None):
     return control
 
 class T2IAdapter:
-    def __init__(self, t2i_model, channels_in, device="cuda"):
+    def __init__(self, t2i_model, channels_in, device=None):
         self.t2i_model = t2i_model
         self.channels_in = channels_in
         self.strength = 1.0
+        if device is None:
+            device = model_management.get_torch_device()
         self.device = device
         self.previous_controlnet = None
         self.control_input = None
diff --git a/main.py b/main.py
index 43dff955..ca8674b5 100644
--- a/main.py
+++ b/main.py
@@ -24,6 +24,7 @@ if __name__ == "__main__":
         print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.")
         print("\t--novram\t\t\tWhen lowvram isn't enough.")
         print()
+        print("\t--cpu\t\t\tTo use the CPU for everything (slow).")
         exit()
 
     if '--dont-upcast-attention' in sys.argv:
diff --git a/nodes.py b/nodes.py
index 84510a05..e5800d0d 100644
--- a/nodes.py
+++ b/nodes.py
@@ -628,9 +628,10 @@ class SetLatentNoiseMask:
         return (s,)
 
 
-def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
+def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
     latent_image = latent["samples"]
     noise_mask = None
+    device = model_management.get_torch_device()
 
     if disable_noise:
         noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
@@ -646,12 +647,9 @@ def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, po
         noise_mask = noise_mask.to(device)
 
     real_model = None
-    if device != "cpu":
-        model_management.load_model_gpu(model)
-        real_model = model.model
-    else:
-        #TODO: cpu support
-        real_model = model.patch_model()
+    model_management.load_model_gpu(model)
+    real_model = model.model
+
     noise = noise.to(device)
     latent_image = latent_image.to(device)
 
@@ -697,9 +695,6 @@ def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, po
     return (out, )
 
 class KSampler:
-    def __init__(self, device="cuda"):
-        self.device = device
-
     @classmethod
     def INPUT_TYPES(s):
         return {"required":
@@ -721,12 +716,9 @@ class KSampler:
     CATEGORY = "sampling"
 
     def sample(self, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0):
-        return common_ksampler(self.device, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
+        return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
 
 class KSamplerAdvanced:
-    def __init__(self, device="cuda"):
-        self.device = device
-
     @classmethod
     def INPUT_TYPES(s):
         return {"required":
@@ -757,7 +749,7 @@ class KSamplerAdvanced:
         disable_noise = False
         if add_noise == "disable":
             disable_noise = True
-        return common_ksampler(self.device, model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise, disable_noise=disable_noise, start_step=start_at_step, last_step=end_at_step, force_full_denoise=force_full_denoise)
+        return common_ksampler(model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise, disable_noise=disable_noise, start_step=start_at_step, last_step=end_at_step, force_full_denoise=force_full_denoise)
 
 class SaveImage:
     def __init__(self):

From 501f19eec65aab44e425172b584e52409a096999 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 11:34:02 -0500
Subject: [PATCH 6/8] Fix clip_skip no longer being loaded from yaml file.

---
 comfy/sd.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index 67a207cb..e19b2a35 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -266,7 +266,7 @@ class CLIP:
         self.cond_stage_model = clip(**(params))
         self.tokenizer = tokenizer(embedding_directory=embedding_directory)
         self.patcher = ModelPatcher(self.cond_stage_model)
-        self.layer_idx = -1
+        self.layer_idx = None
 
     def clone(self):
         n = CLIP(no_init=True)
@@ -287,7 +287,8 @@ class CLIP:
         self.layer_idx = layer_idx
 
     def encode(self, text):
-        self.cond_stage_model.clip_layer(self.layer_idx)
+        if self.layer_idx is not None:
+            self.cond_stage_model.clip_layer(self.layer_idx)
         tokens = self.tokenizer.tokenize_with_weights(text)
         try:
             self.patcher.patch_model()

From 165be5828a1615aec1b5ef10924d469158d29fd9 Mon Sep 17 00:00:00 2001
From: edikius <44285594+edikius@users.noreply.github.com>
Date: Mon, 6 Mar 2023 17:41:40 +0100
Subject: [PATCH 7/8] Fixed import (#44)

* fixed import error

I had an
ImportError: cannot import name 'Protocol' from 'typing'
while trying to update so I fixed it to start an app

* Update main.py

* deleted example files
---
 comfy/ldm/modules/sub_quadratic_attention.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/comfy/ldm/modules/sub_quadratic_attention.py b/comfy/ldm/modules/sub_quadratic_attention.py
index 5abed48c..edbff74a 100644
--- a/comfy/ldm/modules/sub_quadratic_attention.py
+++ b/comfy/ldm/modules/sub_quadratic_attention.py
@@ -14,7 +14,12 @@ import torch
 from torch import Tensor
 from torch.utils.checkpoint import checkpoint
 import math
-from typing import Optional, NamedTuple, Protocol, List
+
+try:
+	from typing import Optional, NamedTuple, List, Protocol
+except ImportError:
+	from typing import Optional, NamedTuple, List
+	from typing_extensions import Protocol
 
 from torch import Tensor
 from typing import List

From 25941aeef9cc17cb26f66b724a6624505ab68eaa Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 6 Mar 2023 12:02:02 -0500
Subject: [PATCH 8/8] This isn't needed anymore.

---
 notebooks/comfyui_colab.ipynb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/notebooks/comfyui_colab.ipynb b/notebooks/comfyui_colab.ipynb
index 06278b27..7664cc03 100644
--- a/notebooks/comfyui_colab.ipynb
+++ b/notebooks/comfyui_colab.ipynb
@@ -35,8 +35,7 @@
       "source": [
         "!git clone https://github.com/comfyanonymous/ComfyUI\n",
         "%cd ComfyUI\n",
-        "!pip install xformers -r requirements.txt\n",
-        "!sed -i 's/v1-inference.yaml/v1-inference_fp16.yaml/g' webshit/index.html"
+        "!pip install xformers -r requirements.txt"
       ]
     },
     {