caching: Remove model awareness from RAM caching

Model RAM pressure is now implemented via the DynamicVRAM system.
2026-05-05 23:02:49 +08:00 · 2026-03-25 20:18:13 +10:00 · 2026-03-25 20:18:13 +10:00 · 56fe08648c
commit 56fe08648c
parent 2a1f402601
3 changed files with 4 additions and 17 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -300,9 +300,6 @@ class ModelPatcher:
    def model_mmap_residency(self, free=False):
        return comfy.model_management.module_mmap_residency(self.model, free=free)

-    def get_ram_usage(self):
-        return self.model_size()
-
    def loaded_size(self):
        return self.model.model_loaded_weight_memory

--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -280,9 +280,6 @@ class CLIP:
        n.apply_hooks_to_conds = self.apply_hooks_to_conds
        return n

-    def get_ram_usage(self):
-        return self.patcher.get_ram_usage()
-
    def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
        return self.patcher.add_patches(patches, strength_patch, strength_model)

@ -840,9 +837,6 @@ class VAE:
        self.size = comfy.model_management.module_size(self.first_stage_model)
        return self.size

-    def get_ram_usage(self):
-        return self.model_size()
-
    def throw_exception_if_invalid(self):
        if self.first_stage_model is None:
            raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@ -494,10 +494,10 @@ class LRUCache(BasicCache):

 RAM_CACHE_HYSTERESIS = 1.1

-#This is kinda in GB but not really. It needs to be non-zero for the below heuristic
-#and as long as Multi GB models dwarf this it will approximate OOM scoring OK
+#Small baseline weight used when a cache entry has no measurable CPU tensors.
+#Keeps unknown-sized entries in eviction scoring without dominating tensor-backed entries.

-RAM_CACHE_DEFAULT_RAM_USAGE = 0.1
+RAM_CACHE_DEFAULT_RAM_USAGE = 0.05

 #Exponential bias towards evicting older workflows so garbage will be taken out
 #in constantly changing setups.
@ -545,11 +545,7 @@ class RAMPressureCache(LRUCache):
                    if isinstance(output, list):
                        scan_list_for_ram_usage(output)
                    elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
-                        #score Tensors at a 50% discount for RAM usage as they are likely to
-                        #be high value intermediates
-                        ram_usage += (output.numel() * output.element_size()) * 0.5
-                    elif hasattr(output, "get_ram_usage"):
-                        ram_usage += output.get_ram_usage()
+                        ram_usage += output.numel() * output.element_size()
            scan_list_for_ram_usage(outputs)

            oom_score *= ram_usage