From a125cd84b054a57729b5eecab930ca9408719832 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:28:01 -0700 Subject: [PATCH] Improve AMD performance. (#10302) I honestly have no idea why this improves things but it does. --- comfy/model_management.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index c5b817b62..146c00925 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -332,6 +332,7 @@ except: SUPPORT_FP8_OPS = args.supports_fp8_compute try: if is_amd(): + torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD try: rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2])) except: @@ -925,11 +926,7 @@ def vae_dtype(device=None, allowed_dtypes=[]): if d == torch.float16 and should_use_fp16(device): return d - # NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32 - # slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3 - # also a problem on RDNA4 except fp32 is also slow there. - # This is due to large bf16 convolutions being extremely slow. - if d == torch.bfloat16 and ((not is_amd()) or amd_min_version(device, min_rdna_version=4)) and should_use_bf16(device): + if d == torch.bfloat16 and should_use_bf16(device): return d return torch.float32