main: force cudnn.benchmark to false (#14390)

Some custom nodes try to set this true globally. It messes with dynamic
VRAM with one-off spikes that can OOM but this is also very high risk
for windows where such allocations might get serviced by shared memory
fallback.

Trump it.
This commit is contained in:
rattus 2026-06-11 03:54:32 +10:00 committed by GitHub
parent 039ed38ed1
commit 6d18f4adac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 2 deletions

View File

@ -534,8 +534,10 @@ try:
except:
pass
if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
torch.backends.cudnn.benchmark = True
def set_cudnn_benchmark():
if torch.cuda.is_available() and torch.backends.cudnn.is_available():
torch.backends.cudnn.benchmark = PerformanceFeature.AutoTune in args.fast
try:
if torch_version_numeric >= (2, 5):

View File

@ -490,6 +490,11 @@ def start_comfyui(asyncio_loop=None):
init_custom_nodes=(not args.disable_all_custom_nodes) or len(args.whitelist_custom_nodes) > 0,
init_api_nodes=not args.disable_api_nodes
))
# Re-apply Comfy's cuDNN benchmark policy after custom-node imports. Benchmark
# mode can request near-card-sized autotune workspaces, and some custom nodes set it at import time.
comfy.model_management.set_cudnn_benchmark()
hook_breaker_ac10a0.restore_functions()
cuda_malloc_warning()