From c8674bc6e9c0762e9fabe0e7f2762d5c36700963 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 13 Oct 2025 18:19:03 -0700
Subject: [PATCH 1/3] Enable RDNA4 pytorch attention on ROCm 7.0 and up.
 (#10332)

---
 comfy/model_management.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 146c00925..709ebc40b 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -345,9 +345,9 @@ try:
                 if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
                     if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                         ENABLE_PYTORCH_ATTENTION = True
-#                if torch_version_numeric >= (2, 8):
-#                    if any((a in arch) for a in ["gfx1201"]):
-#                        ENABLE_PYTORCH_ATTENTION = True
+                if rocm_version >= (7, 0):
+                   if any((a in arch) for a in ["gfx1201"]):
+                       ENABLE_PYTORCH_ATTENTION = True
         if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
             if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx942", "gfx950"]):  # TODO: more arches
                 SUPPORT_FP8_OPS = True

From e4ea3936660a8f8dfa2467e51631362b04ad47e8 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 13 Oct 2025 19:18:58 -0700
Subject: [PATCH 2/3] Fix loading old stable diffusion ckpt files on newer
 numpy. (#10333)

---
 comfy/utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/comfy/utils.py b/comfy/utils.py
index fab28cf08..0fd03f165 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -39,7 +39,11 @@ if hasattr(torch.serialization, "add_safe_globals"):  # TODO: this was added in
         pass
     ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint"
 
-    from numpy.core.multiarray import scalar
+    def scalar(*args, **kwargs):
+        from numpy.core.multiarray import scalar as sc
+        return sc(*args, **kwargs)
+    scalar.__module__ = "numpy.core.multiarray"
+
     from numpy import dtype
     from numpy.dtypes import Float64DType
     from _codecs import encode

From dfff7e5332530b7278c1f90c51aed525db53489e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 13 Oct 2025 19:37:19 -0700
Subject: [PATCH 3/3] Better memory estimation for the SD/Flux VAE on AMD.
 (#10334)

---
 comfy/sd.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index b9c2e995e..28bee248d 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -276,8 +276,13 @@ class VAE:
         if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
             sd = diffusers_convert.convert_vae_state_dict(sd)
 
-        self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) #These are for AutoencoderKL and need tweaking (should be lower)
-        self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype)
+        if model_management.is_amd():
+            VAE_KL_MEM_RATIO = 2.73
+        else:
+            VAE_KL_MEM_RATIO = 1.0
+
+        self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO #These are for AutoencoderKL and need tweaking (should be lower)
+        self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO
         self.downscale_ratio = 8
         self.upscale_ratio = 8
         self.latent_channels = 4