mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-14 23:42:35 +08:00
Add env TORCH_AMD_CUDNN_ENABLED
This commit is contained in:
parent
c4a6b389de
commit
58db8864a6
@ -26,6 +26,7 @@ import importlib
|
|||||||
import platform
|
import platform
|
||||||
import weakref
|
import weakref
|
||||||
import gc
|
import gc
|
||||||
|
import os
|
||||||
|
|
||||||
class VRAMState(Enum):
|
class VRAMState(Enum):
|
||||||
DISABLED = 0 #No vram present: no need to move models to vram
|
DISABLED = 0 #No vram present: no need to move models to vram
|
||||||
@ -338,8 +339,11 @@ try:
|
|||||||
if is_amd():
|
if is_amd():
|
||||||
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
|
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
|
||||||
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
|
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
|
||||||
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
|
torch.backends.cudnn.enabled = os.environ.get("TORCH_AMD_CUDNN_ENABLED", "0").strip().lower() not in {
|
||||||
logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
|
"0", "off", "false", "disable", "disabled", "no"}
|
||||||
|
if not torch.backends.cudnn.enabled:
|
||||||
|
logging.info(
|
||||||
|
"ComfyUI has set torch.backends.cudnn.enabled to False for better AMD performance. Set environment var TORCH_AMD_CUDDNN_ENABLED=1 to enable it again.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
|
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
|
||||||
@ -1082,20 +1086,8 @@ def cast_to_device(tensor, device, dtype, copy=False):
|
|||||||
non_blocking = device_supports_non_blocking(device)
|
non_blocking = device_supports_non_blocking(device)
|
||||||
return cast_to(tensor, dtype=dtype, device=device, non_blocking=non_blocking, copy=copy)
|
return cast_to(tensor, dtype=dtype, device=device, non_blocking=non_blocking, copy=copy)
|
||||||
|
|
||||||
|
|
||||||
PINNED_MEMORY = {}
|
|
||||||
TOTAL_PINNED_MEMORY = 0
|
|
||||||
if PerformanceFeature.PinnedMem in args.fast:
|
|
||||||
if WINDOWS:
|
|
||||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45 # Windows limit is apparently 50%
|
|
||||||
else:
|
|
||||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
|
|
||||||
else:
|
|
||||||
MAX_PINNED_MEMORY = -1
|
|
||||||
|
|
||||||
def pin_memory(tensor):
|
def pin_memory(tensor):
|
||||||
global TOTAL_PINNED_MEMORY
|
if PerformanceFeature.PinnedMem not in args.fast:
|
||||||
if MAX_PINNED_MEMORY <= 0:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not is_nvidia():
|
if not is_nvidia():
|
||||||
@ -1104,21 +1096,13 @@ def pin_memory(tensor):
|
|||||||
if not is_device_cpu(tensor.device):
|
if not is_device_cpu(tensor.device):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
size = tensor.numel() * tensor.element_size()
|
if torch.cuda.cudart().cudaHostRegister(tensor.data_ptr(), tensor.numel() * tensor.element_size(), 1) == 0:
|
||||||
if (TOTAL_PINNED_MEMORY + size) > MAX_PINNED_MEMORY:
|
|
||||||
return False
|
|
||||||
|
|
||||||
ptr = tensor.data_ptr()
|
|
||||||
if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0:
|
|
||||||
PINNED_MEMORY[ptr] = size
|
|
||||||
TOTAL_PINNED_MEMORY += size
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def unpin_memory(tensor):
|
def unpin_memory(tensor):
|
||||||
global TOTAL_PINNED_MEMORY
|
if PerformanceFeature.PinnedMem not in args.fast:
|
||||||
if MAX_PINNED_MEMORY <= 0:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not is_nvidia():
|
if not is_nvidia():
|
||||||
@ -1127,11 +1111,7 @@ def unpin_memory(tensor):
|
|||||||
if not is_device_cpu(tensor.device):
|
if not is_device_cpu(tensor.device):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
ptr = tensor.data_ptr()
|
if torch.cuda.cudart().cudaHostUnregister(tensor.data_ptr()) == 0:
|
||||||
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
|
||||||
TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
|
|
||||||
if len(PINNED_MEMORY) == 0:
|
|
||||||
TOTAL_PINNED_MEMORY = 0
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user