Merge remote-tracking branch 'origin/master' into frontendrefactor

This commit is contained in:
pythongosssss 2023-03-06 17:02:34 +00:00
commit 09db31309a
11 changed files with 205 additions and 35 deletions

View File

@ -14,7 +14,12 @@ import torch
from torch import Tensor from torch import Tensor
from torch.utils.checkpoint import checkpoint from torch.utils.checkpoint import checkpoint
import math import math
from typing import Optional, NamedTuple, Protocol, List
try:
from typing import Optional, NamedTuple, List, Protocol
except ImportError:
from typing import Optional, NamedTuple, List
from typing_extensions import Protocol
from torch import Tensor from torch import Tensor
from typing import List from typing import List

View File

@ -31,6 +31,8 @@ try:
except: except:
pass pass
if "--cpu" in sys.argv:
vram_state = CPU
if "--lowvram" in sys.argv: if "--lowvram" in sys.argv:
set_vram_to = LOW_VRAM set_vram_to = LOW_VRAM
if "--novram" in sys.argv: if "--novram" in sys.argv:
@ -118,6 +120,8 @@ def load_model_gpu(model):
def load_controlnet_gpu(models): def load_controlnet_gpu(models):
global current_gpu_controlnets global current_gpu_controlnets
global vram_state global vram_state
if vram_state == CPU:
return
if vram_state == LOW_VRAM or vram_state == NO_VRAM: if vram_state == LOW_VRAM or vram_state == NO_VRAM:
#don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
@ -144,10 +148,20 @@ def unload_if_low_vram(model):
return model.cpu() return model.cpu()
return model return model
def get_torch_device():
if vram_state == CPU:
return torch.device("cpu")
else:
return torch.cuda.current_device()
def get_autocast_device(dev):
if hasattr(dev, 'type'):
return dev.type
return "cuda"
def get_free_memory(dev=None, torch_free_too=False): def get_free_memory(dev=None, torch_free_too=False):
if dev is None: if dev is None:
dev = torch.cuda.current_device() dev = get_torch_device()
if hasattr(dev, 'type') and dev.type == 'cpu': if hasattr(dev, 'type') and dev.type == 'cpu':
mem_free_total = psutil.virtual_memory().available mem_free_total = psutil.virtual_memory().available

View File

@ -438,7 +438,7 @@ class KSampler:
else: else:
max_denoise = True max_denoise = True
with precision_scope(self.device): with precision_scope(model_management.get_autocast_device(self.device)):
if self.sampler == "uni_pc": if self.sampler == "uni_pc":
samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask) samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask)
elif self.sampler == "uni_pc_bh2": elif self.sampler == "uni_pc_bh2":

View File

@ -266,7 +266,7 @@ class CLIP:
self.cond_stage_model = clip(**(params)) self.cond_stage_model = clip(**(params))
self.tokenizer = tokenizer(embedding_directory=embedding_directory) self.tokenizer = tokenizer(embedding_directory=embedding_directory)
self.patcher = ModelPatcher(self.cond_stage_model) self.patcher = ModelPatcher(self.cond_stage_model)
self.layer_idx = -1 self.layer_idx = None
def clone(self): def clone(self):
n = CLIP(no_init=True) n = CLIP(no_init=True)
@ -287,7 +287,8 @@ class CLIP:
self.layer_idx = layer_idx self.layer_idx = layer_idx
def encode(self, text): def encode(self, text):
self.cond_stage_model.clip_layer(self.layer_idx) if self.layer_idx is not None:
self.cond_stage_model.clip_layer(self.layer_idx)
tokens = self.tokenizer.tokenize_with_weights(text) tokens = self.tokenizer.tokenize_with_weights(text)
try: try:
self.patcher.patch_model() self.patcher.patch_model()
@ -299,7 +300,7 @@ class CLIP:
return cond return cond
class VAE: class VAE:
def __init__(self, ckpt_path=None, scale_factor=0.18215, device="cuda", config=None): def __init__(self, ckpt_path=None, scale_factor=0.18215, device=None, config=None):
if config is None: if config is None:
#default SD1.x/SD2.x VAE parameters #default SD1.x/SD2.x VAE parameters
ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
@ -308,6 +309,8 @@ class VAE:
self.first_stage_model = AutoencoderKL(**(config['params']), ckpt_path=ckpt_path) self.first_stage_model = AutoencoderKL(**(config['params']), ckpt_path=ckpt_path)
self.first_stage_model = self.first_stage_model.eval() self.first_stage_model = self.first_stage_model.eval()
self.scale_factor = scale_factor self.scale_factor = scale_factor
if device is None:
device = model_management.get_torch_device()
self.device = device self.device = device
def decode(self, samples): def decode(self, samples):
@ -381,11 +384,13 @@ def resize_image_to(tensor, target_latent_tensor, batched_number):
return torch.cat([tensor] * batched_number, dim=0) return torch.cat([tensor] * batched_number, dim=0)
class ControlNet: class ControlNet:
def __init__(self, control_model, device="cuda"): def __init__(self, control_model, device=None):
self.control_model = control_model self.control_model = control_model
self.cond_hint_original = None self.cond_hint_original = None
self.cond_hint = None self.cond_hint = None
self.strength = 1.0 self.strength = 1.0
if device is None:
device = model_management.get_torch_device()
self.device = device self.device = device
self.previous_controlnet = None self.previous_controlnet = None
@ -406,7 +411,7 @@ class ControlNet:
else: else:
precision_scope = contextlib.nullcontext precision_scope = contextlib.nullcontext
with precision_scope(self.device): with precision_scope(model_management.get_autocast_device(self.device)):
self.control_model = model_management.load_if_low_vram(self.control_model) self.control_model = model_management.load_if_low_vram(self.control_model)
control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=cond_txt) control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=cond_txt)
self.control_model = model_management.unload_if_low_vram(self.control_model) self.control_model = model_management.unload_if_low_vram(self.control_model)
@ -481,7 +486,7 @@ def load_controlnet(ckpt_path, model=None):
context_dim = controlnet_data[key].shape[1] context_dim = controlnet_data[key].shape[1]
use_fp16 = False use_fp16 = False
if controlnet_data[key].dtype == torch.float16: if model_management.should_use_fp16() and controlnet_data[key].dtype == torch.float16:
use_fp16 = True use_fp16 = True
control_model = cldm.ControlNet(image_size=32, control_model = cldm.ControlNet(image_size=32,
@ -527,10 +532,12 @@ def load_controlnet(ckpt_path, model=None):
return control return control
class T2IAdapter: class T2IAdapter:
def __init__(self, t2i_model, channels_in, device="cuda"): def __init__(self, t2i_model, channels_in, device=None):
self.t2i_model = t2i_model self.t2i_model = t2i_model
self.channels_in = channels_in self.channels_in = channels_in
self.strength = 1.0 self.strength = 1.0
if device is None:
device = model_management.get_torch_device()
self.device = device self.device = device
self.previous_controlnet = None self.previous_controlnet = None
self.control_input = None self.control_input = None
@ -613,11 +620,7 @@ class T2IAdapter:
def load_t2i_adapter(ckpt_path, model=None): def load_t2i_adapter(ckpt_path, model=None):
t2i_data = load_torch_file(ckpt_path) t2i_data = load_torch_file(ckpt_path)
keys = t2i_data.keys() keys = t2i_data.keys()
if "style_embedding" in keys: if "body.0.in_conv.weight" in keys:
pass
# TODO
# model_ad = adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
elif "body.0.in_conv.weight" in keys:
cin = t2i_data['body.0.in_conv.weight'].shape[1] cin = t2i_data['body.0.in_conv.weight'].shape[1]
model_ad = adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4) model_ad = adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
else: else:
@ -626,6 +629,26 @@ def load_t2i_adapter(ckpt_path, model=None):
model_ad.load_state_dict(t2i_data) model_ad.load_state_dict(t2i_data)
return T2IAdapter(model_ad, cin // 64) return T2IAdapter(model_ad, cin // 64)
class StyleModel:
def __init__(self, model, device="cpu"):
self.model = model
def get_cond(self, input):
return self.model(input.last_hidden_state)
def load_style_model(ckpt_path):
model_data = load_torch_file(ckpt_path)
keys = model_data.keys()
if "style_embedding" in keys:
model = adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
else:
raise Exception("invalid style model {}".format(ckpt_path))
model.load_state_dict(model_data)
return StyleModel(model)
def load_clip(ckpt_path, embedding_directory=None): def load_clip(ckpt_path, embedding_directory=None):
clip_data = load_torch_file(ckpt_path) clip_data = load_torch_file(ckpt_path)
config = {} config = {}

View File

@ -0,0 +1,32 @@
from transformers import CLIPVisionModel, CLIPVisionConfig, CLIPImageProcessor
from comfy.sd import load_torch_file
import os
class ClipVisionModel():
def __init__(self):
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config.json")
config = CLIPVisionConfig.from_json_file(json_config)
self.model = CLIPVisionModel(config)
self.processor = CLIPImageProcessor(crop_size=224,
do_center_crop=True,
do_convert_rgb=True,
do_normalize=True,
do_resize=True,
image_mean=[ 0.48145466,0.4578275,0.40821073],
image_std=[0.26862954,0.26130258,0.27577711],
resample=3, #bicubic
size=224)
def load_sd(self, sd):
self.model.load_state_dict(sd, strict=False)
def encode_image(self, image):
inputs = self.processor(images=[image[0]], return_tensors="pt")
outputs = self.model(**inputs)
return outputs
def load(ckpt_path):
clip_data = load_torch_file(ckpt_path)
clip = ClipVisionModel()
clip.load_sd(clip_data)
return clip

View File

@ -0,0 +1,23 @@
{
"_name_or_path": "openai/clip-vit-large-patch14",
"architectures": [
"CLIPVisionModel"
],
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "quick_gelu",
"hidden_size": 1024,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 768,
"torch_dtype": "float32",
"transformers_version": "4.24.0"
}

View File

@ -24,6 +24,7 @@ if __name__ == "__main__":
print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.") print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.")
print("\t--novram\t\t\tWhen lowvram isn't enough.") print("\t--novram\t\t\tWhen lowvram isn't enough.")
print() print()
print("\t--cpu\t\t\tTo use the CPU for everything (slow).")
exit() exit()
if '--dont-upcast-attention' in sys.argv: if '--dont-upcast-attention' in sys.argv:

104
nodes.py
View File

@ -18,6 +18,8 @@ import comfy.samplers
import comfy.sd import comfy.sd
import comfy.utils import comfy.utils
import comfy_extras.clip_vision
import model_management import model_management
import importlib import importlib
@ -370,6 +372,76 @@ class CLIPLoader:
clip = comfy.sd.load_clip(ckpt_path=clip_path, embedding_directory=CheckpointLoader.embedding_directory) clip = comfy.sd.load_clip(ckpt_path=clip_path, embedding_directory=CheckpointLoader.embedding_directory)
return (clip,) return (clip,)
class CLIPVisionLoader:
models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models")
clip_dir = os.path.join(models_dir, "clip_vision")
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_name": (filter_files_extensions(recursive_search(s.clip_dir), supported_pt_extensions), ),
}}
RETURN_TYPES = ("CLIP_VISION",)
FUNCTION = "load_clip"
CATEGORY = "loaders"
def load_clip(self, clip_name):
clip_path = os.path.join(self.clip_dir, clip_name)
clip_vision = comfy_extras.clip_vision.load(clip_path)
return (clip_vision,)
class CLIPVisionEncode:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_vision": ("CLIP_VISION",),
"image": ("IMAGE",)
}}
RETURN_TYPES = ("CLIP_VISION_OUTPUT",)
FUNCTION = "encode"
CATEGORY = "conditioning/style_model"
def encode(self, clip_vision, image):
output = clip_vision.encode_image(image)
return (output,)
class StyleModelLoader:
models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models")
style_model_dir = os.path.join(models_dir, "style_models")
@classmethod
def INPUT_TYPES(s):
return {"required": { "style_model_name": (filter_files_extensions(recursive_search(s.style_model_dir), supported_pt_extensions), )}}
RETURN_TYPES = ("STYLE_MODEL",)
FUNCTION = "load_style_model"
CATEGORY = "loaders"
def load_style_model(self, style_model_name):
style_model_path = os.path.join(self.style_model_dir, style_model_name)
style_model = comfy.sd.load_style_model(style_model_path)
return (style_model,)
class StyleModelApply:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning": ("CONDITIONING", ),
"style_model": ("STYLE_MODEL", ),
"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "apply_stylemodel"
CATEGORY = "conditioning/style_model"
def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
cond = style_model.get_cond(clip_vision_output)
c = []
for t in conditioning:
n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
c.append(n)
return (c, )
class EmptyLatentImage: class EmptyLatentImage:
def __init__(self, device="cpu"): def __init__(self, device="cpu"):
self.device = device self.device = device
@ -419,7 +491,7 @@ class LatentRotate:
RETURN_TYPES = ("LATENT",) RETURN_TYPES = ("LATENT",)
FUNCTION = "rotate" FUNCTION = "rotate"
CATEGORY = "latent" CATEGORY = "latent/transform"
def rotate(self, samples, rotation): def rotate(self, samples, rotation):
s = samples.copy() s = samples.copy()
@ -443,7 +515,7 @@ class LatentFlip:
RETURN_TYPES = ("LATENT",) RETURN_TYPES = ("LATENT",)
FUNCTION = "flip" FUNCTION = "flip"
CATEGORY = "latent" CATEGORY = "latent/transform"
def flip(self, samples, flip_method): def flip(self, samples, flip_method):
s = samples.copy() s = samples.copy()
@ -508,7 +580,7 @@ class LatentCrop:
RETURN_TYPES = ("LATENT",) RETURN_TYPES = ("LATENT",)
FUNCTION = "crop" FUNCTION = "crop"
CATEGORY = "latent" CATEGORY = "latent/transform"
def crop(self, samples, width, height, x, y): def crop(self, samples, width, height, x, y):
s = samples.copy() s = samples.copy()
@ -556,9 +628,10 @@ class SetLatentNoiseMask:
return (s,) return (s,)
def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
latent_image = latent["samples"] latent_image = latent["samples"]
noise_mask = None noise_mask = None
device = model_management.get_torch_device()
if disable_noise: if disable_noise:
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
@ -574,12 +647,9 @@ def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, po
noise_mask = noise_mask.to(device) noise_mask = noise_mask.to(device)
real_model = None real_model = None
if device != "cpu": model_management.load_model_gpu(model)
model_management.load_model_gpu(model) real_model = model.model
real_model = model.model
else:
#TODO: cpu support
real_model = model.patch_model()
noise = noise.to(device) noise = noise.to(device)
latent_image = latent_image.to(device) latent_image = latent_image.to(device)
@ -625,9 +695,6 @@ def common_ksampler(device, model, seed, steps, cfg, sampler_name, scheduler, po
return (out, ) return (out, )
class KSampler: class KSampler:
def __init__(self, device="cuda"):
self.device = device
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
return {"required": return {"required":
@ -649,12 +716,9 @@ class KSampler:
CATEGORY = "sampling" CATEGORY = "sampling"
def sample(self, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0): def sample(self, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0):
return common_ksampler(self.device, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise) return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
class KSamplerAdvanced: class KSamplerAdvanced:
def __init__(self, device="cuda"):
self.device = device
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
return {"required": return {"required":
@ -685,7 +749,7 @@ class KSamplerAdvanced:
disable_noise = False disable_noise = False
if add_noise == "disable": if add_noise == "disable":
disable_noise = True disable_noise = True
return common_ksampler(self.device, model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise, disable_noise=disable_noise, start_step=start_at_step, last_step=end_at_step, force_full_denoise=force_full_denoise) return common_ksampler(model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise, disable_noise=disable_noise, start_step=start_at_step, last_step=end_at_step, force_full_denoise=force_full_denoise)
class SaveImage: class SaveImage:
def __init__(self): def __init__(self):
@ -866,10 +930,14 @@ NODE_CLASS_MAPPINGS = {
"LatentCrop": LatentCrop, "LatentCrop": LatentCrop,
"LoraLoader": LoraLoader, "LoraLoader": LoraLoader,
"CLIPLoader": CLIPLoader, "CLIPLoader": CLIPLoader,
"CLIPVisionEncode": CLIPVisionEncode,
"StyleModelApply": StyleModelApply,
"ControlNetApply": ControlNetApply, "ControlNetApply": ControlNetApply,
"ControlNetLoader": ControlNetLoader, "ControlNetLoader": ControlNetLoader,
"DiffControlNetLoader": DiffControlNetLoader, "DiffControlNetLoader": DiffControlNetLoader,
"T2IAdapterLoader": T2IAdapterLoader, "T2IAdapterLoader": T2IAdapterLoader,
"StyleModelLoader": StyleModelLoader,
"CLIPVisionLoader": CLIPVisionLoader,
"VAEDecodeTiled": VAEDecodeTiled, "VAEDecodeTiled": VAEDecodeTiled,
} }

View File

@ -35,8 +35,7 @@
"source": [ "source": [
"!git clone https://github.com/comfyanonymous/ComfyUI\n", "!git clone https://github.com/comfyanonymous/ComfyUI\n",
"%cd ComfyUI\n", "%cd ComfyUI\n",
"!pip install xformers -r requirements.txt\n", "!pip install xformers -r requirements.txt"
"!sed -i 's/v1-inference.yaml/v1-inference_fp16.yaml/g' webshit/index.html"
] ]
}, },
{ {
@ -89,6 +88,11 @@
"#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_color_sd14v1.pth -P ./models/t2i_adapter/\n", "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_color_sd14v1.pth -P ./models/t2i_adapter/\n",
"#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_canny_sd14v1.pth -P ./models/t2i_adapter/\n", "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_canny_sd14v1.pth -P ./models/t2i_adapter/\n",
"\n", "\n",
"# T2I Styles Model\n",
"#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_style_sd14v1.pth -P ./models/style_models/\n",
"\n",
"# CLIPVision model (needed for styles model)\n",
"#!wget -c https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin -O ./models/clip_vision/clip_vit14.bin\n",
"\n", "\n",
"\n", "\n",
"# ControlNet\n", "# ControlNet\n",