From 19b466160c1cd43f707769adef6f8ed6e9fd50bf Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:16:03 -0700 Subject: [PATCH] Workaround for nvidia issue where VAE uses 3x more memory on torch 2.9 (#10373) --- comfy/ops.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/comfy/ops.py b/comfy/ops.py index b2096b40e..893ceda98 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -52,6 +52,16 @@ try: except (ModuleNotFoundError, TypeError): logging.warning("Could not set sdpa backend priority.") +NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False +try: + if comfy.model_management.is_nvidia(): + if torch.backends.cudnn.version() >= 91300 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10): + #TODO: change upper bound version once it's fixed' + NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True + logging.info("working around nvidia conv3d memory bug.") +except: + pass + cast_to = comfy.model_management.cast_to #TODO: remove once no more references if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast: @@ -151,6 +161,15 @@ class disable_weight_init: def reset_parameters(self): return None + def _conv_forward(self, input, weight, bias, *args, **kwargs): + if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16): + out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True) + if bias is not None: + out += bias.reshape((1, -1) + (1,) * (out.ndim - 2)) + return out + else: + return super()._conv_forward(input, weight, bias, *args, **kwargs) + def forward_comfy_cast_weights(self, input): weight, bias = cast_bias_weight(self, input) return self._conv_forward(input, weight, bias)