From 0a66d4b0afe4a78a200809b7d1d3beec6c6a2a8f Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:28:52 -0700
Subject: [PATCH 1/3] Per device stream counters for async offload. (#7873)

---
 comfy/model_management.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 78317af3c..44aff3762 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -946,9 +946,9 @@ if args.async_offload:
     NUM_STREAMS = 2
     logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
 
-stream_counter = 0
+stream_counters = {}
 def get_offload_stream(device):
-    global stream_counter
+    stream_counter = stream_counters.get(device, 0)
     if NUM_STREAMS <= 1:
         return None
 
@@ -958,6 +958,7 @@ def get_offload_stream(device):
         stream_counter = (stream_counter + 1) % len(ss)
         if is_device_cuda(device):
             ss[stream_counter].wait_stream(torch.cuda.current_stream())
+        stream_counters[device] = stream_counter
         return s
     elif is_device_cuda(device):
         ss = []
@@ -966,6 +967,7 @@ def get_offload_stream(device):
         STREAMS[device] = ss
         s = ss[stream_counter]
         stream_counter = (stream_counter + 1) % len(ss)
+        stream_counters[device] = stream_counter
         return s
     return None
 

From 7ee96455e2ed29293aa6076db9b4866862d41142 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:38:45 -0700
Subject: [PATCH 2/3] Bump minimum pyav version to 14.2.0 (#7874)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 10cc177af..f64a05947 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,5 +22,5 @@ psutil
 kornia>=0.7.1
 spandrel
 soundfile
-av>=14.1.0
+av>=14.2.0
 pydantic~=2.0

From dbc726f80c9ac0512d2611fad63d984b8c03886f Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:42:00 -0700
Subject: [PATCH 3/3] Better vace memory estimation. (#7875)

---
 comfy/ldm/wan/model.py    | 1 +
 comfy/supported_models.py | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
index b8eec3afb..66bee7480 100644
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -631,6 +631,7 @@ class VaceWanModel(WanModel):
             if ii is not None:
                 c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len)
                 x += c_skip * vace_strength
+                del c_skip
         # head
         x = self.head(x, e)
 
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 5e55035cf..69bcee1f7 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -993,6 +993,10 @@ class WAN21_Vace(WAN21_T2V):
         "model_type": "vace",
     }
 
+    def __init__(self, unet_config):
+        super().__init__(unet_config)
+        self.memory_usage_factor = 1.2 * self.memory_usage_factor
+
     def get_model(self, state_dict, prefix="", device=None):
         out = model_base.WAN21_Vace(self, image_to_video=False, device=device)
         return out