Allow FP16 accumulation with --fast (#6453)

Currently only applies to PyTorch nightly releases. (>=20250208)
2025-02-28 22:51:45 +00:00 · 2025-02-08 17:00:56 -05:00 · 2025-02-08 17:00:56 -05:00 · 43a74c0de1
commit 43a74c0de1
parent af93c8d1ee
1 changed files with 6 additions and 0 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -241,6 +241,12 @@ if ENABLE_PYTORCH_ATTENTION:
    torch.backends.cuda.enable_flash_sdp(True)
    torch.backends.cuda.enable_mem_efficient_sdp(True)

+try:
+    if is_nvidia() and args.fast:
+        torch.backends.cuda.matmul.allow_fp16_accumulation = True
+except:
+    pass
+
 try:
    if int(torch_version[0]) == 2 and int(torch_version[2]) >= 5:
        torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)