From e1d85e7577d8f6355bd4cb3449bcb0a7e5f80cb8 Mon Sep 17 00:00:00 2001
From: Qiacheng Li <choliaky@gmail.com>
Date: Wed, 12 Nov 2025 12:21:05 -0800
Subject: [PATCH 1/3] Update README.md for Intel Arc GPU installation, remove
 IPEX (#10729)

IPEX is no longer needed for Intel Arc GPUs.  Removing instruction to setup ipex.
---
 README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8142f595b..9e28803a2 100644
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ RDNA 4 (RX 9000 series):
 
 ### Intel GPUs (Windows and Linux)
 
-(Option 1) Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
+Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
 
 1. To install PyTorch xpu, use the following command:
 
@@ -252,10 +252,6 @@ This is the command to install the Pytorch xpu nightly which might have some per
 
 ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu```
 
-(Option 2) Alternatively, Intel GPUs supported by Intel Extension for PyTorch (IPEX) can leverage IPEX for improved performance.
-
-1. visit [Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu) for more information.
-
 ### NVIDIA
 
 Nvidia users should install stable pytorch using this command:

From 18e7d6dba5f1012d4cf09e8f777dc85d56ff25c0 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 13 Nov 2025 07:19:53 +1000
Subject: [PATCH 2/3] mm/mp: always unload re-used but modified models (#10724)

The partial unloader path in model re-use flow skips straight to the
actual unload without any check of the patching UUID. This means that
if you do an upscale flow with a model patch on an existing model, it
will not apply your patchings.

Fix by delaying the partial_unload until after the uuid checks. This
is done by making partial_unload a model of partial_load where extra_mem
is -ve.
---
 comfy/model_management.py | 5 +----
 comfy/model_patcher.py    | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index d8913082a..a21df54b3 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -503,10 +503,7 @@ class LoadedModel:
         use_more_vram = lowvram_model_memory
         if use_more_vram == 0:
             use_more_vram = 1e32
-        if use_more_vram > 0:
-            self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
-        else:
-            self.model.partially_unload(self.model.offload_device, -use_more_vram, force_patch_weights=force_patch_weights)
+        self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
 
         real_model = self.model.model
 
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 68b0a9192..cf1b0d441 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -928,6 +928,9 @@ class ModelPatcher:
                 extra_memory += (used - self.model.model_loaded_weight_memory)
 
             self.patch_model(load_weights=False)
+            if extra_memory < 0 and not unpatch_weights:
+                self.partially_unload(self.offload_device, -extra_memory, force_patch_weights=force_patch_weights)
+                return 0
             full_load = False
             if self.model.model_lowvram == False and self.model.model_loaded_weight_memory > 0:
                 self.apply_hooks(self.forced_hooks, force_apply=True)

From 1c7eaeca1013e4315f36e0d4d274faa106001121 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 13 Nov 2025 07:20:53 +1000
Subject: [PATCH 3/3] qwen: reduce VRAM usage (#10725)

Clean up a bunch of stacked and no-longer-needed tensors on the QWEN
VRAM peak (currently FFN).

With this I go from OOMing at B=37x1328x1328 to being able to
succesfully run B=47 (RTX5090).
---
 comfy/ldm/qwen_image/model.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
index e5d0d17c1..427ea19c1 100644
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -236,10 +236,10 @@ class QwenImageTransformerBlock(nn.Module):
         img_mod1, img_mod2 = img_mod_params.chunk(2, dim=-1)
         txt_mod1, txt_mod2 = txt_mod_params.chunk(2, dim=-1)
 
-        img_normed = self.img_norm1(hidden_states)
-        img_modulated, img_gate1 = self._modulate(img_normed, img_mod1)
-        txt_normed = self.txt_norm1(encoder_hidden_states)
-        txt_modulated, txt_gate1 = self._modulate(txt_normed, txt_mod1)
+        img_modulated, img_gate1 = self._modulate(self.img_norm1(hidden_states), img_mod1)
+        del img_mod1
+        txt_modulated, txt_gate1 = self._modulate(self.txt_norm1(encoder_hidden_states), txt_mod1)
+        del txt_mod1
 
         img_attn_output, txt_attn_output = self.attn(
             hidden_states=img_modulated,
@@ -248,16 +248,20 @@ class QwenImageTransformerBlock(nn.Module):
             image_rotary_emb=image_rotary_emb,
             transformer_options=transformer_options,
         )
+        del img_modulated
+        del txt_modulated
 
         hidden_states = hidden_states + img_gate1 * img_attn_output
         encoder_hidden_states = encoder_hidden_states + txt_gate1 * txt_attn_output
+        del img_attn_output
+        del txt_attn_output
+        del img_gate1
+        del txt_gate1
 
-        img_normed2 = self.img_norm2(hidden_states)
-        img_modulated2, img_gate2 = self._modulate(img_normed2, img_mod2)
+        img_modulated2, img_gate2 = self._modulate(self.img_norm2(hidden_states), img_mod2)
         hidden_states = torch.addcmul(hidden_states, img_gate2, self.img_mlp(img_modulated2))
 
-        txt_normed2 = self.txt_norm2(encoder_hidden_states)
-        txt_modulated2, txt_gate2 = self._modulate(txt_normed2, txt_mod2)
+        txt_modulated2, txt_gate2 = self._modulate(self.txt_norm2(encoder_hidden_states), txt_mod2)
         encoder_hidden_states = torch.addcmul(encoder_hidden_states, txt_gate2, self.txt_mlp(txt_modulated2))
 
         return encoder_hidden_states, hidden_states