Refactor AMD performance settings and enable SDP

2026-02-25 21:37:32 +08:00 · 2025-12-22 02:22:47 +03:00 · 2025-12-22 02:22:47 +03:00 · 5d82179084
commit 5d82179084
parent ab7cf25eea
1 changed files with 7 additions and 5 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -26,7 +26,7 @@ import importlib
 import platform
 import weakref
 import gc
-import comfy.zluda
+import os

 class VRAMState(Enum):
    DISABLED = 0    #No vram present: no need to move models to vram
@ -334,13 +334,15 @@ except:
 SUPPORT_FP8_OPS = args.supports_fp8_compute

 AMD_RDNA2_AND_OLDER_ARCH = ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]
+AMD_ENABLE_MIOPEN_ENV = 'COMFYUI_ENABLE_MIOPEN'

 try:
    if is_amd():
        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
        if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
-            torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
-            logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
+            if os.getenv(AMD_ENABLE_MIOPEN_ENV) != '1':
+                torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
+                logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")

        try:
            rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
@ -367,8 +369,8 @@ except:

 if ENABLE_PYTORCH_ATTENTION:
    torch.backends.cuda.enable_math_sdp(True)
-    # torch.backends.cuda.enable_flash_sdp(True)
-    # torch.backends.cuda.enable_mem_efficient_sdp(True)
+    torch.backends.cuda.enable_flash_sdp(True)
+    torch.backends.cuda.enable_mem_efficient_sdp(True)


 PRIORITIZE_FP16 = False  # TODO: remove and replace with something that shows exactly which dtype is faster than the other