From 983ebc579212e209f52dff014b79bfe1932c0959 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 28 Nov 2023 04:58:32 -0500 Subject: [PATCH] Use smart model management for VAE to decrease latency. --- comfy/sd.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index 53c79e1c..f4f84d0a 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -187,10 +187,12 @@ class VAE: if device is None: device = model_management.vae_device() self.device = device - self.offload_device = model_management.vae_offload_device() + offload_device = model_management.vae_offload_device() self.vae_dtype = model_management.vae_dtype() self.first_stage_model.to(self.vae_dtype) + self.patcher = comfy.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device) + def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16): steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap) steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap) @@ -219,10 +221,9 @@ class VAE: return samples def decode(self, samples_in): - self.first_stage_model = self.first_stage_model.to(self.device) try: memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype) - model_management.free_memory(memory_used, self.device) + model_management.load_models_gpu([self.patcher], memory_required=memory_used) free_memory = model_management.get_free_memory(self.device) batch_number = int(free_memory / memory_used) batch_number = max(1, batch_number) @@ -235,22 +236,19 @@ class VAE: print("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.") pixel_samples = self.decode_tiled_(samples_in) - self.first_stage_model = self.first_stage_model.to(self.offload_device) pixel_samples = pixel_samples.cpu().movedim(1,-1) return pixel_samples def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16): - self.first_stage_model = self.first_stage_model.to(self.device) + model_management.load_model_gpu(self.patcher) output = self.decode_tiled_(samples, tile_x, tile_y, overlap) - self.first_stage_model = self.first_stage_model.to(self.offload_device) return output.movedim(1,-1) def encode(self, pixel_samples): - self.first_stage_model = self.first_stage_model.to(self.device) pixel_samples = pixel_samples.movedim(-1,1) try: memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype) - model_management.free_memory(memory_used, self.device) + model_management.load_models_gpu([self.patcher], memory_required=memory_used) free_memory = model_management.get_free_memory(self.device) batch_number = int(free_memory / memory_used) batch_number = max(1, batch_number) @@ -263,14 +261,12 @@ class VAE: print("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.") samples = self.encode_tiled_(pixel_samples) - self.first_stage_model = self.first_stage_model.to(self.offload_device) return samples def encode_tiled(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64): - self.first_stage_model = self.first_stage_model.to(self.device) + model_management.load_model_gpu(self.patcher) pixel_samples = pixel_samples.movedim(-1,1) samples = self.encode_tiled_(pixel_samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap) - self.first_stage_model = self.first_stage_model.to(self.offload_device) return samples def get_sd(self):