From b30e980a206607d1a9d56b7a6f7df3999d68438a Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Mon, 25 May 2026 08:26:50 +1000
Subject: [PATCH] cache-ram: lower thresholds (#14089)

Use the RAM right up to the wire as the community is bit accustomed too.

This trades off headroom for the case where large chunky intermediates
arrive and potenitally hits pagefile/swap, but a lot of people have
"it just fits" workflows out there, so strike a compromise with
75->90%.

Disable the incative cache for all but the very high RAM users.
---
 comfy/cli_args.py | 2 +-
 main.py           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 9d88c8517..47b8174f4 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -111,7 +111,7 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent
 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
 
 cache_group = parser.add_mutually_exclusive_group()
-cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
+cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 10%% of system RAM (min 2GB, max 10GB), inactive 100%% of system RAM (max 96GB).")
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
diff --git a/main.py b/main.py
index 1e47cab84..f23074942 100644
--- a/main.py
+++ b/main.py
@@ -286,8 +286,8 @@ def prompt_worker(q, server_instance):
     cache_ram = 0
     cache_ram_inactive = 0
     if not args.cache_classic and not args.cache_none and args.cache_lru <= 0:
-        cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
-        cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0))
+        cache_ram = min(10.0, max(2.0, comfy.model_management.total_ram * 0.10 / 1024.0))
+        cache_ram_inactive = min(96.0, comfy.model_management.total_ram / 1024.0)
         if len(args.cache_ram) > 0:
             cache_ram = args.cache_ram[0]
         if len(args.cache_ram) > 1: