diff --git a/comfy/sd.py b/comfy/sd.py index 06d6c1a5..87b380b1 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -438,10 +438,8 @@ class VAE: self.device = device def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16): - it_1 = -(samples.shape[2] // -(tile_y * 2 - overlap)) * -(samples.shape[3] // -(tile_x // 2 - overlap)) - it_2 = -(samples.shape[2] // -(tile_y // 2 - overlap)) * -(samples.shape[3] // -(tile_x * 2 - overlap)) - it_3 = -(samples.shape[2] // -(tile_y - overlap)) * -(samples.shape[3] // -(tile_x - overlap)) - pbar = tqdm(total=samples.shape[0] * (it_1 + it_2 + it_3)) + steps = samples.shape[0] * utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap) + pbar = tqdm(total=steps) decode_fn = lambda a: (self.first_stage_model.decode(1. / self.scale_factor * a.to(self.device)) + 1.0) output = torch.clamp(( @@ -492,11 +490,9 @@ class VAE: self.first_stage_model = self.first_stage_model.to(self.device) pixel_samples = pixel_samples.movedim(-1,1).to(self.device) - it_1 = -(pixel_samples.shape[2] // -(tile_y * 2 - overlap)) * -(pixel_samples.shape[3] // -(tile_x // 2 - overlap)) - it_2 = -(pixel_samples.shape[2] // -(tile_y // 2 - overlap)) * -(pixel_samples.shape[3] // -(tile_x * 2 - overlap)) - it_3 = -(pixel_samples.shape[2] // -(tile_y - overlap)) * -(pixel_samples.shape[3] // -(tile_x - overlap)) - pbar = tqdm(total=(it_1 + it_2 + it_3)) - + steps = utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap) + pbar = tqdm(total=steps) + samples = utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) diff --git a/comfy/utils.py b/comfy/utils.py index c7c6a08c..82d3aa0d 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -62,6 +62,12 @@ def common_upscale(samples, width, height, upscale_method, crop): s = samples return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method) +def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap): + it_1 = -(height // -(tile_y * 2 - overlap)) * -(width // -(tile_x // 2 - overlap)) + it_2 = -(height // -(tile_y // 2 - overlap)) * -(width // -(tile_x * 2 - overlap)) + it_3 = -(height // -(tile_y - overlap)) * -(width // -(tile_x - overlap)) + return it_1 + it_2 + it_3 + @torch.inference_mode() def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, pbar = None): output = torch.empty((samples.shape[0], out_channels, round(samples.shape[2] * upscale_amount), round(samples.shape[3] * upscale_amount)), device="cpu")