diff --git a/comfy/model_management.py b/comfy/model_management.py index 4e0e6a0a..4f3f2857 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -216,6 +216,11 @@ current_gpu_controlnets = [] model_accelerated = False +def unet_offload_device(): + if vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.SHARED: + return get_torch_device() + else: + return torch.device("cpu") def unload_model(): global current_loaded_model @@ -228,10 +233,9 @@ def unload_model(): accelerate.hooks.remove_hook_from_submodules(current_loaded_model.model) model_accelerated = False - #never unload models from GPU on high vram - if vram_state != VRAMState.HIGH_VRAM: - current_loaded_model.model.cpu() - current_loaded_model.model_patches_to("cpu") + + current_loaded_model.model.to(unet_offload_device()) + current_loaded_model.model_patches_to(unet_offload_device()) current_loaded_model.unpatch_model() current_loaded_model = None diff --git a/comfy/sd.py b/comfy/sd.py index 52d016b1..542f704a 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -1142,6 +1142,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True) model = model_config.get_model(sd) + model = model.to(model_management.unet_offload_device()) model.load_model_weights(sd, "model.diffusion_model.") if output_vae: