From 9d8a817985bb069685e440b38762f95dc834d242 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 27 Nov 2025 14:46:12 -0800
Subject: [PATCH] Enable async offloading by default on Nvidia. (#10953)

Add --disable-async-offload to disable it.

If this causes OOMs that go away when you --disable-async-offload please
report it.
---
 comfy/cli_args.py         |  3 ++-
 comfy/model_management.py | 13 +++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index d2b60e347..5f0dfaa10 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -131,7 +131,8 @@ vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for e
 
 parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
 
-parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
+parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
+parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
 
 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")
 
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 9c403d580..38c506df5 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1013,8 +1013,17 @@ def force_channels_last():
 
 STREAMS = {}
 NUM_STREAMS = 0
-if args.async_offload:
-    NUM_STREAMS = 2
+if args.async_offload is not None:
+    NUM_STREAMS = args.async_offload
+else:
+    #  Enable by default on Nvidia
+    if is_nvidia():
+        NUM_STREAMS = 2
+
+if args.disable_async_offload:
+    NUM_STREAMS = 0
+
+if NUM_STREAMS > 0:
     logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
 
 def current_stream(device):