From d965474aaae2f1b461e0925a7e1519b740393994 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 1 Aug 2024 16:39:59 -0400 Subject: [PATCH] Make ComfyUI split batches a higher priority than weight offload. --- comfy/model_management.py | 10 +++++++--- comfy/sampler_helpers.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index b4f32d64..da0b989a 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -379,11 +379,15 @@ def free_memory(memory_required, device, keep_loaded=[]): if mem_free_torch > mem_free_total * 0.25: soft_empty_cache() -def load_models_gpu(models, memory_required=0, force_patch_weights=False): +def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None): global vram_state inference_memory = minimum_inference_memory() extra_mem = max(inference_memory, memory_required) + if minimum_memory_required is None: + minimum_memory_required = extra_mem + else: + minimum_memory_required = max(inference_memory, minimum_memory_required) models = set(models) @@ -446,8 +450,8 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False): if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM): model_size = loaded_model.model_memory_required(torch_dev) current_free_mem = get_free_memory(torch_dev) - lowvram_model_memory = int(max(64 * (1024 * 1024), (current_free_mem - extra_mem))) - if model_size <= (current_free_mem - inference_memory): #only switch to lowvram if really necessary + lowvram_model_memory = int(max(64 * (1024 * 1024), (current_free_mem - minimum_memory_required))) + if model_size <= lowvram_model_memory: #only switch to lowvram if really necessary lowvram_model_memory = 0 if vram_set_state == VRAMState.NO_VRAM: diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py index a18abd9e..4a2ec123 100644 --- a/comfy/sampler_helpers.py +++ b/comfy/sampler_helpers.py @@ -61,7 +61,9 @@ def prepare_sampling(model, noise_shape, conds): device = model.load_device real_model = None models, inference_memory = get_additional_models(conds, model.model_dtype()) - comfy.model_management.load_models_gpu([model] + models, model.memory_required([noise_shape[0] * 2] + list(noise_shape[1:])) + inference_memory) + memory_required = model.memory_required([noise_shape[0] * 2] + list(noise_shape[1:])) + inference_memory + minimum_memory_required = model.memory_required([noise_shape[0]] + list(noise_shape[1:])) + inference_memory + comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required, minimum_memory_required=minimum_memory_required) real_model = model.model return real_model, conds, models