From d8b442709a5607a3f82bcf5a03f2f81b1cdacadc Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Wed, 13 May 2026 22:23:37 +1000
Subject: [PATCH] make default

---
 comfy/cli_args.py |  2 +-
 main.py           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index e0d7d4af4..d5d13008b 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -111,10 +111,10 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent
 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
 
 cache_group = parser.add_mutually_exclusive_group()
+cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
-cache_group.add_argument("--cache-ram", nargs='*', type=float, default=None, metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
diff --git a/main.py b/main.py
index ad9742252..1e47cab84 100644
--- a/main.py
+++ b/main.py
@@ -285,7 +285,7 @@ def prompt_worker(q, server_instance):
     current_time: float = 0.0
     cache_ram = 0
     cache_ram_inactive = 0
-    if args.cache_ram is not None:
+    if not args.cache_classic and not args.cache_none and args.cache_lru <= 0:
         cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
         cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0))
         if len(args.cache_ram) > 0:
@@ -293,11 +293,11 @@ def prompt_worker(q, server_instance):
         if len(args.cache_ram) > 1:
             cache_ram_inactive = args.cache_ram[1]
 
-    cache_type = execution.CacheType.CLASSIC
-    if args.cache_lru > 0:
+    cache_type = execution.CacheType.RAM_PRESSURE
+    if args.cache_classic:
+        cache_type = execution.CacheType.CLASSIC
+    elif args.cache_lru > 0:
         cache_type = execution.CacheType.LRU
-    elif max(cache_ram, cache_ram_inactive) > 0:
-        cache_type = execution.CacheType.RAM_PRESSURE
     elif args.cache_none:
         cache_type = execution.CacheType.NONE