mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-16 17:42:58 +08:00
Turn off cuda malloc by default when --fast autotune is turned on. (#10393)
This commit is contained in:
parent
9da397ea2f
commit
5b80addafd
@ -371,6 +371,9 @@ try:
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
|
||||||
|
torch.backends.cudnn.benchmark = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if torch_version_numeric >= (2, 5):
|
if torch_version_numeric >= (2, 5):
|
||||||
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
|
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
|
||||||
|
|||||||
@ -67,9 +67,6 @@ except:
|
|||||||
|
|
||||||
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
|
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
|
||||||
|
|
||||||
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
|
|
||||||
torch.backends.cudnn.benchmark = True
|
|
||||||
|
|
||||||
def cast_to_input(weight, input, non_blocking=False, copy=True):
|
def cast_to_input(weight, input, non_blocking=False, copy=True):
|
||||||
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
|
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from comfy.cli_args import args
|
from comfy.cli_args import args, PerformanceFeature
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
|
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
|
||||||
@ -76,6 +76,7 @@ if not args.cuda_malloc:
|
|||||||
version = module.__version__
|
version = module.__version__
|
||||||
|
|
||||||
if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch
|
if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch
|
||||||
|
if PerformanceFeature.AutoTune not in args.fast: # Autotune has issues with cuda malloc
|
||||||
args.cuda_malloc = cuda_malloc_supported()
|
args.cuda_malloc = cuda_malloc_supported()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user