mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-08 13:20:50 +08:00
Re-enable MIOpen for amd cards
Default MIOPEN_FIND_MODE=FAST Default PYTORCH_MIOPEN_SUGGEST_NHWC=0
This commit is contained in:
parent
5d9ad0c6bf
commit
67ca7fb4f6
@ -26,6 +26,7 @@ import importlib
|
|||||||
import platform
|
import platform
|
||||||
import weakref
|
import weakref
|
||||||
import gc
|
import gc
|
||||||
|
import os
|
||||||
|
|
||||||
class VRAMState(Enum):
|
class VRAMState(Enum):
|
||||||
DISABLED = 0 #No vram present: no need to move models to vram
|
DISABLED = 0 #No vram present: no need to move models to vram
|
||||||
@ -337,10 +338,6 @@ AMD_RDNA2_AND_OLDER_ARCH = ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012
|
|||||||
try:
|
try:
|
||||||
if is_amd():
|
if is_amd():
|
||||||
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
|
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
|
||||||
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
|
|
||||||
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
|
|
||||||
logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
|
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
|
||||||
except:
|
except:
|
||||||
@ -348,6 +345,16 @@ try:
|
|||||||
|
|
||||||
logging.info("AMD arch: {}".format(arch))
|
logging.info("AMD arch: {}".format(arch))
|
||||||
logging.info("ROCm version: {}".format(rocm_version))
|
logging.info("ROCm version: {}".format(rocm_version))
|
||||||
|
|
||||||
|
if os.getenv('MIOPEN_FIND_MODE') is None:
|
||||||
|
# MIOpen default search mode can cause significant slowdowns without much benefit
|
||||||
|
os.environ['MIOPEN_FIND_MODE'] = "FAST"
|
||||||
|
logging.info("Set: MIOPEN_FIND_MODE=FAST for better AMD performance, change by setting MIOPEN_FIND_MODE.")
|
||||||
|
if os.getenv('PYTORCH_MIOPEN_SUGGEST_NHWC') is None:
|
||||||
|
# See https://github.com/ROCm/TheRock/issues/2485#issuecomment-3666986174
|
||||||
|
os.environ['PYTORCH_MIOPEN_SUGGEST_NHWC'] = "0"
|
||||||
|
logging.info("Set: PYTORCH_MIOPEN_SUGGEST_NHWC=0 for better AMD performance, change by setting PYTORCH_MIOPEN_SUGGEST_NHWC.")
|
||||||
|
|
||||||
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
||||||
if importlib.util.find_spec('triton') is not None: # AMD efficient attention implementation depends on triton. TODO: better way of detecting if it's compiled in or not.
|
if importlib.util.find_spec('triton') is not None: # AMD efficient attention implementation depends on triton. TODO: better way of detecting if it's compiled in or not.
|
||||||
if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much
|
if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user