From 67ca7fb4f6a88ccc1c6cc44ef3587be717946c73 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Wed, 17 Dec 2025 20:38:29 +0000 Subject: [PATCH] Re-enable MIOpen for amd cards Default MIOPEN_FIND_MODE=FAST Default PYTORCH_MIOPEN_SUGGEST_NHWC=0 --- comfy/model_management.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 40717b1e4..9bb429ce7 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -26,6 +26,7 @@ import importlib import platform import weakref import gc +import os class VRAMState(Enum): DISABLED = 0 #No vram present: no need to move models to vram @@ -337,10 +338,6 @@ AMD_RDNA2_AND_OLDER_ARCH = ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012 try: if is_amd(): arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName - if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)): - torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD - logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.") - try: rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2])) except: @@ -348,6 +345,16 @@ try: logging.info("AMD arch: {}".format(arch)) logging.info("ROCm version: {}".format(rocm_version)) + + if os.getenv('MIOPEN_FIND_MODE') is None: + # MIOpen default search mode can cause significant slowdowns without much benefit + os.environ['MIOPEN_FIND_MODE'] = "FAST" + logging.info("Set: MIOPEN_FIND_MODE=FAST for better AMD performance, change by setting MIOPEN_FIND_MODE.") + if os.getenv('PYTORCH_MIOPEN_SUGGEST_NHWC') is None: + # See https://github.com/ROCm/TheRock/issues/2485#issuecomment-3666986174 + os.environ['PYTORCH_MIOPEN_SUGGEST_NHWC'] = "0" + logging.info("Set: PYTORCH_MIOPEN_SUGGEST_NHWC=0 for better AMD performance, change by setting PYTORCH_MIOPEN_SUGGEST_NHWC.") + if args.use_split_cross_attention == False and args.use_quad_cross_attention == False: if importlib.util.find_spec('triton') is not None: # AMD efficient attention implementation depends on triton. TODO: better way of detecting if it's compiled in or not. if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much