From 0a66d4b0afe4a78a200809b7d1d3beec6c6a2a8f Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 29 Apr 2025 17:28:52 -0700 Subject: [PATCH 1/3] Per device stream counters for async offload. (#7873) --- comfy/model_management.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 78317af3c..44aff3762 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -946,9 +946,9 @@ if args.async_offload: NUM_STREAMS = 2 logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS)) -stream_counter = 0 +stream_counters = {} def get_offload_stream(device): - global stream_counter + stream_counter = stream_counters.get(device, 0) if NUM_STREAMS <= 1: return None @@ -958,6 +958,7 @@ def get_offload_stream(device): stream_counter = (stream_counter + 1) % len(ss) if is_device_cuda(device): ss[stream_counter].wait_stream(torch.cuda.current_stream()) + stream_counters[device] = stream_counter return s elif is_device_cuda(device): ss = [] @@ -966,6 +967,7 @@ def get_offload_stream(device): STREAMS[device] = ss s = ss[stream_counter] stream_counter = (stream_counter + 1) % len(ss) + stream_counters[device] = stream_counter return s return None From 7ee96455e2ed29293aa6076db9b4866862d41142 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 29 Apr 2025 17:38:45 -0700 Subject: [PATCH 2/3] Bump minimum pyav version to 14.2.0 (#7874) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 10cc177af..f64a05947 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,5 +22,5 @@ psutil kornia>=0.7.1 spandrel soundfile -av>=14.1.0 +av>=14.2.0 pydantic~=2.0 From dbc726f80c9ac0512d2611fad63d984b8c03886f Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 29 Apr 2025 17:42:00 -0700 Subject: [PATCH 3/3] Better vace memory estimation. (#7875) --- comfy/ldm/wan/model.py | 1 + comfy/supported_models.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index b8eec3afb..66bee7480 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -631,6 +631,7 @@ class VaceWanModel(WanModel): if ii is not None: c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len) x += c_skip * vace_strength + del c_skip # head x = self.head(x, e) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 5e55035cf..69bcee1f7 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -993,6 +993,10 @@ class WAN21_Vace(WAN21_T2V): "model_type": "vace", } + def __init__(self, unet_config): + super().__init__(unet_config) + self.memory_usage_factor = 1.2 * self.memory_usage_factor + def get_model(self, state_dict, prefix="", device=None): out = model_base.WAN21_Vace(self, image_to_video=False, device=device) return out