dynamicVRAM + --cache-ram 2 (CORE-117) (#13603 )

* pinned_memory: remove JIT RAM pressure release This doesn't work, as freeing intermediates for pins needs to be higher-priority than freeing pins-for-pins if and when you are going to do that. So this is too late as pins-for-pins is model load time and we dont have JIT pins-for-pins. * cacheing: Add a filter to only free intermediates from inactive wfs This is to get priorities in amongst pins straight. * mm: free inactive-ram from RAM cache first Stuff from inactive workflows should be freed before anything else. * caching: purge old ModelPatchers first Dont try and score them, just dump them at the first sign of trouble if they arent part of the workflow.
Use pyav to load images instead of pillow. (#13594 )
2026-05-24 16:07:30 +08:00 · 2026-04-28 19:15:02 -04:00 · 2026-04-28 18:15:06 -04:00 · 2026-04-28 17:59:55 -04:00 · 2026-04-28 16:34:37 -04:00 · 2026-04-28 16:27:42 -04:00
7 changed files with 19 additions and 11 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -663,6 +663,7 @@ def minimum_inference_memory():

 def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
    cleanup_models_gc()
+    comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
    unloaded_model = []
    can_unload = []
    unloaded_models = []
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@ -2,7 +2,6 @@ import comfy.model_management
 import comfy.memory_management
 import comfy_aimdo.host_buffer
 import comfy_aimdo.torch
-import psutil

 from comfy.cli_args import args

@ -12,11 +11,6 @@ def get_pin(module):
 def pin_memory(module):
    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
        return
-    #FIXME: This is a RAM cache trigger event
-    ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
-    #we split the difference and assume half the RAM cache headroom is for us
-    if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
-        comfy.memory_management.extra_ram_release(ram_headroom)

    size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])

--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@ -284,13 +284,16 @@ class VideoFromFile(VideoInput):

                        if not checked_alpha:
                            for comp in frame.format.components:
-                                if comp.is_alpha:
+                                if comp.is_alpha or frame.format.name == "pal8":
                                    alphas = []
                                    image_format = 'gbrapf32le'
                                    break
                            checked_alpha = True

                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+                        if frame.rotation != 0:
+                            k = int(round(frame.rotation // 90))
+                            img = np.rot90(img, k=k, axes=(0, 1)).copy()
                        if alphas is None:
                            frames.append(torch.from_numpy(img))
                        else:
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@ -5,6 +5,7 @@ import psutil
 import time
 import torch
 from typing import Sequence, Mapping, Dict
+from comfy.model_patcher import ModelPatcher
 from comfy_execution.graph import DynamicPrompt
 from abc import ABC, abstractmethod

@ -523,13 +524,15 @@ class RAMPressureCache(LRUCache):
        self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
        super().set_local(node_id, value)

-    def ram_release(self, target):
+    def ram_release(self, target, free_active=False):
        if psutil.virtual_memory().available >= target:
            return

        clean_list = []

        for key, cache_entry in self.cache.items():
+            if not free_active and self.used_generation[key] == self.generation:
+                continue
            oom_score =  RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])

            ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
@ -542,6 +545,9 @@ class RAMPressureCache(LRUCache):
                        scan_list_for_ram_usage(output)
                    elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
                        ram_usage += output.numel() * output.element_size()
+                    elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
+                        #old ModelPatchers are the first to go
+                        ram_usage = 1e30
            scan_list_for_ram_usage(cache_entry.outputs)

            oom_score *= ram_usage
--- a/execution.py
+++ b/execution.py
@ -779,7 +779,7 @@ class PromptExecutor:

                    if self.cache_type == CacheType.RAM_PRESSURE:
                        comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
-                        comfy.memory_management.extra_ram_release(ram_headroom)
+                        ram_release_callback(ram_headroom, free_active=True)
                else:
                    # Only execute when the while-loop ends without break
                    # Send cached UI for intermediate output nodes that weren't executed
--- a/nodes.py
+++ b/nodes.py
@ -32,7 +32,7 @@ import comfy.controlnet
 from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict, FileLocator
 from comfy_api.internal import register_versions, ComfyAPIWithVersion
 from comfy_api.version_list import supported_versions
-from comfy_api.latest import io, ComfyExtension
+from comfy_api.latest import io, ComfyExtension, InputImpl

 import comfy.clip_vision

@ -1716,6 +1716,10 @@ class LoadImage:
    def load_image(self, image):
        image_path = folder_paths.get_annotated_filepath(image)

+        components = InputImpl.VideoFromFile(image_path).get_components()
+        if components.images.shape[0] > 0:
+            return (components.images, 1.0 - components.alpha[..., -1] if components.alpha is not None else torch.zeros((components.images.shape[0], 64, 64), dtype=torch.float32, device="cpu"))
+
        img = node_helpers.pillow(Image.open, image_path)

        output_images = []
--- a/requirements.txt
+++ b/requirements.txt
@ -23,7 +23,7 @@ SQLAlchemy>=2.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.2.14
+comfy-aimdo==0.3.0
 requests
 simpleeval>=1.0.0
 blake3
Author	SHA1	Message	Date
rattus	fce0398470	dynamicVRAM + --cache-ram 2 (CORE-117) (#13603 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details * pinned_memory: remove JIT RAM pressure release This doesn't work, as freeing intermediates for pins needs to be higher-priority than freeing pins-for-pins if and when you are going to do that. So this is too late as pins-for-pins is model load time and we dont have JIT pins-for-pins. * cacheing: Add a filter to only free intermediates from inactive wfs This is to get priorities in amongst pins straight. * mm: free inactive-ram from RAM cache first Stuff from inactive workflows should be freed before anything else. * caching: purge old ModelPatchers first Dont try and score them, just dump them at the first sign of trouble if they arent part of the workflow.	2026-04-28 19:15:02 -04:00
comfyanonymous	dae3d34751	Use pyav to load images instead of pillow. (#13594 ) On failure (ex: animated webp files) fallback to old pillow code. This should fix the extra precision in high bit depth images (like 16 bit PNG) being discarded when loaded by Pillow and potentially add support for more image formats.	2026-04-28 18:15:06 -04:00
comfyanonymous	c7a517c2f9	Make pyav loading code handle tRNS PNG. (#13607 )	2026-04-28 17:59:55 -04:00
rattus	e514119e1e	comfy-aimdo v0.3.0 (#13604 ) Comfy-aimdo 0.3.0 contains several major new features. multi-GPU support ARM support AMD support Refactorings include: Linkless architecture - linkage is now performed purely at runtime to stop host library lookups completely and only interact with the torch-loaded Nvidia stack. Elimination of cudart integration on linux. Its no consistent with windows. Misc bugfixes and minor features.	2026-04-28 16:34:37 -04:00
comfyanonymous	13519934ba	Handle metadata rotation in pyav code. (#13605 )	2026-04-28 16:27:42 -04:00