add --high-ram option (#14437)

Add this option for users who know they have so much ram they want to pin everything or have a pagefile that outruns their disk speed. The removes the RAM pressure caps completely and pins behind the primary model load forcing all models to be permanently comitted to RAM.
2026-06-15 12:29:33 +08:00 · 2026-06-13 00:53:33 +10:00 · 2026-06-13 00:53:33 +10:00 · d7a552720c
commit d7a552720c
parent 02656ea0bb
3 changed files with 9 additions and 1 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -115,6 +115,7 @@ cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metav
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
+cache_group.add_argument("--high-ram", action="store_true", help="Can improve performance slightly on high RAM or on systems where pagefile use is preferred over model loading.")

 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
@ -249,6 +250,9 @@ else:
 if args.cache_ram is not None and len(args.cache_ram) > 2:
    parser.error("--cache-ram accepts at most two values: active GB and inactive GB")

+if args.high_ram:
+    args.cache_classic = True
+
 if args.windows_standalone_build:
    args.auto_launch = True

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -643,6 +643,8 @@ def free_pins(size, evict_active=False):
    return freed_total

 def ensure_pin_budget(size, evict_active=False):
+    if args.high_ram:
+        return True
    if args.fast_disk:
        shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
    else:
@ -1496,6 +1498,8 @@ if not args.disable_pinned_memory:
 PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])

 def pinned_hostbuf_size(size):
+    if args.high_ram:
+        return max(0, int(size * 2))
    return max(0, int(min(size, MAX_PINNED_MEMORY) * 2))

 def discard_cuda_async_error():
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -180,7 +180,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
            if pin is not None:
                cast_maybe_lowvram_patch([pin], dest, offload_stream)
                return
-            if signature is None:
+            if signature is None or args.high_ram:
                comfy.pinned_memory.pin_memory(m, subset=subset, size=size)
                pin = comfy.pinned_memory.get_pin(m, subset=subset)
            cast_maybe_lowvram_patch(source, pin, offload_stream, xfer_dest2=dest)