diff --git a/comfy/model_management.py b/comfy/model_management.py index d82d5b8b0..7467391cd 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -371,6 +371,9 @@ try: except: pass +if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast: + torch.backends.cudnn.benchmark = True + try: if torch_version_numeric >= (2, 5): torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True) diff --git a/comfy/ops.py b/comfy/ops.py index 5feeb3571..967134f05 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -67,9 +67,6 @@ except: cast_to = comfy.model_management.cast_to #TODO: remove once no more references -if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast: - torch.backends.cudnn.benchmark = True - def cast_to_input(weight, input, non_blocking=False, copy=True): return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) diff --git a/cuda_malloc.py b/cuda_malloc.py index c1d9ae3ca..6520d5123 100644 --- a/cuda_malloc.py +++ b/cuda_malloc.py @@ -1,6 +1,6 @@ import os import importlib.util -from comfy.cli_args import args +from comfy.cli_args import args, PerformanceFeature import subprocess #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import. @@ -75,8 +75,9 @@ if not args.cuda_malloc: spec.loader.exec_module(module) version = module.__version__ - if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch - args.cuda_malloc = cuda_malloc_supported() + if int(version[0]) >= 2 and "+cu" in version: # enable by default for torch version 2.0 and up only on cuda torch + if PerformanceFeature.AutoTune not in args.fast: # Autotune has issues with cuda malloc + args.cuda_malloc = cuda_malloc_supported() except: pass