From 37713e3b0acfc576f4eafc0b47582374ab5987dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:22:14 +0800 Subject: [PATCH 1/5] Add basic XPU device support closed #387 --- comfy/model_management.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 052dfb77..f0b8be55 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -5,6 +5,7 @@ LOW_VRAM = 2 NORMAL_VRAM = 3 HIGH_VRAM = 4 MPS = 5 +XPU = 6 accelerate_enabled = False vram_state = NORMAL_VRAM @@ -85,10 +86,17 @@ try: except: pass +try: + import intel_extension_for_pytorch + if torch.xpu.is_available(): + vram_state = XPU +except: + pass + if forced_cpu: vram_state = CPU -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state]) +print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state]) current_loaded_model = None @@ -141,6 +149,9 @@ def load_model_gpu(model): mps_device = torch.device("mps") real_model.to(mps_device) pass + elif vram_state == XPU: + real_model.to("xpu") + pass elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: model_accelerated = False real_model.cuda() @@ -189,6 +200,8 @@ def unload_if_low_vram(model): def get_torch_device(): if vram_state == MPS: return torch.device("mps") + if vram_state == XPU: + return torch.device("xpu") if vram_state == CPU: return torch.device("cpu") else: @@ -228,6 +241,9 @@ def get_free_memory(dev=None, torch_free_too=False): if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total + elif hasattr(dev, 'type') and (dev.type == 'xpu'): + mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) + mem_free_torch = mem_free_total else: stats = torch.cuda.memory_stats(dev) mem_active = stats['active_bytes.all.current'] @@ -258,8 +274,12 @@ def mps_mode(): global vram_state return vram_state == MPS +def xpu_mode(): + global vram_state + return vram_state == XPU + def should_use_fp16(): - if cpu_mode() or mps_mode(): + if cpu_mode() or mps_mode() or xpu_mode(): return False #TODO ? if torch.cuda.is_bf16_supported(): From 1ced2bdd2da9a13caf72d7bff36d7f645f443fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:25:37 +0800 Subject: [PATCH 2/5] Specify safetensors version to avoid upstream errors https://github.com/huggingface/safetensors/issues/142 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b4040a2..0527b31d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ torchsde einops open-clip-torch transformers>=4.25.1 -safetensors +safetensors>=0.3.0 pytorch_lightning aiohttp accelerate From 84b9c0ac2ff49b5b18b8e7804f8fe42a379a0787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 12:27:22 +0800 Subject: [PATCH 3/5] Import intel_extension_for_pytorch as ipex --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index f0b8be55..379cc18d 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -87,7 +87,7 @@ except: pass try: - import intel_extension_for_pytorch + import intel_extension_for_pytorch as ipex if torch.xpu.is_available(): vram_state = XPU except: From 7cb924f68469cd2481b2313f8e5fc02587279bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 14:24:47 +0800 Subject: [PATCH 4/5] Use separate variables instead of `vram_state` --- comfy/model_management.py | 70 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 379cc18d..a8416774 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -5,9 +5,9 @@ LOW_VRAM = 2 NORMAL_VRAM = 3 HIGH_VRAM = 4 MPS = 5 -XPU = 6 accelerate_enabled = False +xpu_available = False vram_state = NORMAL_VRAM total_vram = 0 @@ -22,7 +22,12 @@ set_vram_to = NORMAL_VRAM try: import torch - total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True + total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) + else: + total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) forced_normal_vram = "--normalvram" in sys.argv if not forced_normal_vram and not forced_cpu: @@ -86,17 +91,10 @@ try: except: pass -try: - import intel_extension_for_pytorch as ipex - if torch.xpu.is_available(): - vram_state = XPU -except: - pass - if forced_cpu: vram_state = CPU -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state]) +print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state]) current_loaded_model = None @@ -133,6 +131,7 @@ def load_model_gpu(model): global current_loaded_model global vram_state global model_accelerated + global xpu_available if model is current_loaded_model: return @@ -149,19 +148,19 @@ def load_model_gpu(model): mps_device = torch.device("mps") real_model.to(mps_device) pass - elif vram_state == XPU: - real_model.to("xpu") - pass elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: model_accelerated = False - real_model.cuda() + if xpu_available: + real_model.to("xpu") + else: + real_model.cuda() else: if vram_state == NO_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"}) elif vram_state == LOW_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"}) - accelerate.dispatch_model(real_model, device_map=device_map, main_device="cuda") + accelerate.dispatch_model(real_model, device_map=device_map, main_device="xpu" if xpu_available else "cuda") model_accelerated = True return current_loaded_model @@ -187,8 +186,12 @@ def load_controlnet_gpu(models): def load_if_low_vram(model): global vram_state + global xpu_available if vram_state == LOW_VRAM or vram_state == NO_VRAM: - return model.cuda() + if xpu_available: + return model.to("xpu") + else: + return model.cuda() return model def unload_if_low_vram(model): @@ -198,14 +201,16 @@ def unload_if_low_vram(model): return model def get_torch_device(): + global xpu_available if vram_state == MPS: return torch.device("mps") - if vram_state == XPU: - return torch.device("xpu") if vram_state == CPU: return torch.device("cpu") else: - return torch.cuda.current_device() + if xpu_available: + return torch.device("xpu") + else: + return torch.cuda.current_device() def get_autocast_device(dev): if hasattr(dev, 'type'): @@ -235,22 +240,24 @@ def pytorch_attention_enabled(): return ENABLE_PYTORCH_ATTENTION def get_free_memory(dev=None, torch_free_too=False): + global xpu_available if dev is None: dev = get_torch_device() if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total - elif hasattr(dev, 'type') and (dev.type == 'xpu'): - mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) - mem_free_torch = mem_free_total else: - stats = torch.cuda.memory_stats(dev) - mem_active = stats['active_bytes.all.current'] - mem_reserved = stats['reserved_bytes.all.current'] - mem_free_cuda, _ = torch.cuda.mem_get_info(dev) - mem_free_torch = mem_reserved - mem_active - mem_free_total = mem_free_cuda + mem_free_torch + if xpu_available: + mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) + mem_free_torch = mem_free_total + else: + stats = torch.cuda.memory_stats(dev) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(dev) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch if torch_free_too: return (mem_free_total, mem_free_torch) @@ -274,12 +281,9 @@ def mps_mode(): global vram_state return vram_state == MPS -def xpu_mode(): - global vram_state - return vram_state == XPU - def should_use_fp16(): - if cpu_mode() or mps_mode() or xpu_mode(): + global xpu_available + if cpu_mode() or mps_mode() or xpu_available: return False #TODO ? if torch.cuda.is_bf16_supported(): From 3e2608e12b312fd5d2396d4146d992cd4f8b9ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 15:44:05 +0800 Subject: [PATCH 5/5] Fix auto lowvram detection on CUDA --- comfy/model_management.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index a8416774..b0123b5f 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -22,11 +22,12 @@ set_vram_to = NORMAL_VRAM try: import torch - import intel_extension_for_pytorch as ipex - if torch.xpu.is_available(): - xpu_available = True - total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) - else: + try: + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True + total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) + except: total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) forced_normal_vram = "--normalvram" in sys.argv