diff --git a/comfy/samplers.py b/comfy/samplers.py index cc750e96..f4faa4cf 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -9,7 +9,7 @@ class CFGDenoiser(torch.nn.Module): self.inner_model = model def forward(self, x, sigma, uncond, cond, cond_scale): - if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] <= (96 * 96): #TODO check memory instead + if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) cond_in = torch.cat([uncond, cond]) @@ -19,6 +19,61 @@ class CFGDenoiser(torch.nn.Module): uncond = self.inner_model(x, sigma, cond=uncond) return uncond + (cond - uncond) * cond_scale +class CFGDenoiserComplex(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + def forward(self, x, sigma, uncond, cond, cond_scale): + def calc_cond(cond, x_in, sigma): + out_cond = torch.zeros_like(x_in) + out_count = torch.ones_like(x_in)/100000.0 + sigma_cmp = sigma[0] + + for x in cond: + area = (x_in.shape[2], x_in.shape[3], 0, 0) + strength = 1.0 + min_sigma = 0.0 + max_sigma = 999.0 + if 'area' in x[1]: + area = x[1]['area'] + if 'strength' in x[1]: + strength = x[1]['strength'] + if 'min_sigma' in x[1]: + min_sigma = x[1]['min_sigma'] + if 'max_sigma' in x[1]: + max_sigma = x[1]['max_sigma'] + if sigma_cmp < min_sigma or sigma_cmp > max_sigma: + continue + input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] + mult = torch.ones_like(input_x) * strength + + rr = 8 + if area[2] != 0: + for t in range(rr): + mult[:,:,area[2]+t:area[2]+1+t,:] *= ((1.0/rr) * (t + 1)) + if (area[0] + area[2]) < x_in.shape[2]: + for t in range(rr): + mult[:,:,area[0] + area[2] - 1 - t:area[0] + area[2] - t,:] *= ((1.0/rr) * (t + 1)) + if area[3] != 0: + for t in range(rr): + mult[:,:,:,area[3]+t:area[3]+1+t] *= ((1.0/rr) * (t + 1)) + if (area[1] + area[3]) < x_in.shape[3]: + for t in range(rr): + mult[:,:,:,area[1] + area[3] - 1 - t:area[1] + area[3] - t] *= ((1.0/rr) * (t + 1)) + + out_cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] += self.inner_model(input_x, sigma, cond=x[0]) * mult + out_count[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] += mult + del input_x + del mult + + out_cond /= out_count + del out_count + return out_cond + + cond = calc_cond(cond, x, sigma) + uncond = calc_cond(uncond, x, sigma) + + return uncond + (cond - uncond) * cond_scale def simple_scheduler(model, steps): sigs = [] @@ -28,6 +83,35 @@ def simple_scheduler(model, steps): sigs += [0.0] return torch.FloatTensor(sigs) +def create_cond_with_same_area_if_none(conds, c): + if 'area' not in c[1]: + return + + c_area = c[1]['area'] + smallest = None + for x in conds: + if 'area' in x[1]: + a = x[1]['area'] + if c_area[2] >= a[2] and c_area[3] >= a[3]: + if a[0] + a[2] >= c_area[0] + c_area[2]: + if a[1] + a[3] >= c_area[1] + c_area[3]: + if smallest is None: + smallest = x + elif 'area' not in smallest[1]: + smallest = x + else: + if smallest[1]['area'][0] * smallest[1]['area'][1] > a[0] * a[1]: + smallest = x + else: + if smallest is None: + smallest = x + if smallest is None: + return + if 'area' in smallest[1]: + if smallest[1]['area'] == c_area: + return + n = c[1].copy() + conds += [[smallest[0], n]] class KSampler: SCHEDULERS = ["karras", "normal", "simple"] @@ -41,7 +125,7 @@ class KSampler: self.model_wrap = k_diffusion.external.CompVisVDenoiser(self.model, quantize=True) else: self.model_wrap = k_diffusion.external.CompVisDenoiser(self.model, quantize=True) - self.model_k = CFGDenoiser(self.model_wrap) + self.model_k = CFGDenoiserComplex(self.model_wrap) self.device = device if scheduler not in self.SCHEDULERS: scheduler = self.SCHEDULERS[0] @@ -94,11 +178,18 @@ class KSampler: if start_step is not None: sigmas = sigmas[start_step:] - noise *= sigmas[0] if latent_image is not None: noise += latent_image + positive = positive[:] + negative = negative[:] + #make sure each cond area has an opposite one with the same area + for c in positive: + create_cond_with_same_area_if_none(negative, c) + for c in negative: + create_cond_with_same_area_if_none(positive, c) + if self.model.model.diffusion_model.dtype == torch.float16: precision_scope = torch.autocast else: diff --git a/nodes.py b/nodes.py index 578f6c4e..11a78112 100644 --- a/nodes.py +++ b/nodes.py @@ -4,6 +4,7 @@ import os import sys import json import hashlib +import copy from PIL import Image from PIL.PngImagePlugin import PngInfo @@ -33,7 +34,39 @@ class CLIPTextEncode: FUNCTION = "encode" def encode(self, clip, text): - return (clip.encode(text), ) + return ([[clip.encode(text), {}]], ) + +class ConditioningCombine: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "combine" + + def combine(self, conditioning_1, conditioning_2): + return (conditioning_1 + conditioning_2, ) + +class ConditioningSetArea: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "width": ("INT", {"default": 64, "min": 64, "max": 4096, "step": 64}), + "height": ("INT", {"default": 64, "min": 64, "max": 4096, "step": 64}), + "x": ("INT", {"default": 0, "min": 0, "max": 4096, "step": 64}), + "y": ("INT", {"default": 0, "min": 0, "max": 4096, "step": 64}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + def append(self, conditioning, width, height, x, y, strength, min_sigma=0.0, max_sigma=99.0): + c = copy.deepcopy(conditioning) + for t in c: + t[1]['area'] = (height // 8, width // 8, y // 8, x // 8) + t[1]['strength'] = strength + t[1]['min_sigma'] = min_sigma + t[1]['max_sigma'] = max_sigma + return (c, ) class VAEDecode: def __init__(self, device="cpu"): @@ -172,14 +205,21 @@ class KSampler: noise = noise.to(self.device) latent_image = latent_image.to(self.device) - if positive.shape[0] < noise.shape[0]: - positive = torch.cat([positive] * noise.shape[0]) + positive_copy = [] + negative_copy = [] - if negative.shape[0] < noise.shape[0]: - negative = torch.cat([negative] * noise.shape[0]) - - positive = positive.to(self.device) - negative = negative.to(self.device) + for p in positive: + t = p[0] + if t.shape[0] < noise.shape[0]: + t = torch.cat([t] * noise.shape[0]) + t = t.to(self.device) + positive_copy += [[t] + p[1:]] + for n in negative: + t = n[0] + if t.shape[0] < noise.shape[0]: + t = torch.cat([t] * noise.shape[0]) + t = t.to(self.device) + negative_copy += [[t] + n[1:]] if sampler_name in comfy.samplers.KSampler.SAMPLERS: sampler = comfy.samplers.KSampler(model, steps=steps, device=self.device, sampler=sampler_name, scheduler=scheduler, denoise=denoise) @@ -187,7 +227,7 @@ class KSampler: #other samplers pass - samples = sampler.sample(noise, positive, negative, cfg=cfg, latent_image=latent_image) + samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image) samples = samples.cpu() model = model.cpu() return (samples, ) @@ -272,7 +312,9 @@ NODE_CLASS_MAPPINGS = { "EmptyLatentImage": EmptyLatentImage, "LatentUpscale": LatentUpscale, "SaveImage": SaveImage, - "LoadImage": LoadImage + "LoadImage": LoadImage, + "ConditioningCombine": ConditioningCombine, + "ConditioningSetArea": ConditioningSetArea, }