Enable async offloading by default on Nvidia. (#10953)

Add --disable-async-offload to disable it. If this causes OOMs that go away when you --disable-async-offload please report it.
2026-06-30 03:39:37 +08:00 · 2025-11-27 14:46:12 -08:00 · 2025-11-27 14:46:12 -08:00 · 9d8a817985
commit 9d8a817985
parent b59750a86a
2 changed files with 13 additions and 3 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -131,7 +131,8 @@ vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for e

 parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")

-parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
+parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
+parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")

 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1013,8 +1013,17 @@ def force_channels_last():

 STREAMS = {}
 NUM_STREAMS = 0
-if args.async_offload:
-    NUM_STREAMS = 2
+if args.async_offload is not None:
+    NUM_STREAMS = args.async_offload
+else:
+    #  Enable by default on Nvidia
+    if is_nvidia():
+        NUM_STREAMS = 2
+
+if args.disable_async_offload:
+    NUM_STREAMS = 0
+
+if NUM_STREAMS > 0:
    logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))

 def current_stream(device):