mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-01 04:42:31 +08:00
dynamicVRAM + --cache-ram 2 (CORE-117) (#13603)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Build package / Build Test (3.10) (push) Waiting to run
Build package / Build Test (3.11) (push) Waiting to run
Build package / Build Test (3.12) (push) Waiting to run
Build package / Build Test (3.13) (push) Waiting to run
Build package / Build Test (3.14) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Build package / Build Test (3.10) (push) Waiting to run
Build package / Build Test (3.11) (push) Waiting to run
Build package / Build Test (3.12) (push) Waiting to run
Build package / Build Test (3.13) (push) Waiting to run
Build package / Build Test (3.14) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
* pinned_memory: remove JIT RAM pressure release This doesn't work, as freeing intermediates for pins needs to be higher-priority than freeing pins-for-pins if and when you are going to do that. So this is too late as pins-for-pins is model load time and we dont have JIT pins-for-pins. * cacheing: Add a filter to only free intermediates from inactive wfs This is to get priorities in amongst pins straight. * mm: free inactive-ram from RAM cache first Stuff from inactive workflows should be freed before anything else. * caching: purge old ModelPatchers first Dont try and score them, just dump them at the first sign of trouble if they arent part of the workflow.
This commit is contained in:
parent
dae3d34751
commit
fce0398470
@ -663,6 +663,7 @@ def minimum_inference_memory():
|
|||||||
|
|
||||||
def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
|
def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
|
||||||
cleanup_models_gc()
|
cleanup_models_gc()
|
||||||
|
comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
|
||||||
unloaded_model = []
|
unloaded_model = []
|
||||||
can_unload = []
|
can_unload = []
|
||||||
unloaded_models = []
|
unloaded_models = []
|
||||||
|
|||||||
@ -2,7 +2,6 @@ import comfy.model_management
|
|||||||
import comfy.memory_management
|
import comfy.memory_management
|
||||||
import comfy_aimdo.host_buffer
|
import comfy_aimdo.host_buffer
|
||||||
import comfy_aimdo.torch
|
import comfy_aimdo.torch
|
||||||
import psutil
|
|
||||||
|
|
||||||
from comfy.cli_args import args
|
from comfy.cli_args import args
|
||||||
|
|
||||||
@ -12,11 +11,6 @@ def get_pin(module):
|
|||||||
def pin_memory(module):
|
def pin_memory(module):
|
||||||
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
|
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
|
||||||
return
|
return
|
||||||
#FIXME: This is a RAM cache trigger event
|
|
||||||
ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
|
|
||||||
#we split the difference and assume half the RAM cache headroom is for us
|
|
||||||
if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
|
|
||||||
comfy.memory_management.extra_ram_release(ram_headroom)
|
|
||||||
|
|
||||||
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import psutil
|
|||||||
import time
|
import time
|
||||||
import torch
|
import torch
|
||||||
from typing import Sequence, Mapping, Dict
|
from typing import Sequence, Mapping, Dict
|
||||||
|
from comfy.model_patcher import ModelPatcher
|
||||||
from comfy_execution.graph import DynamicPrompt
|
from comfy_execution.graph import DynamicPrompt
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
@ -523,13 +524,15 @@ class RAMPressureCache(LRUCache):
|
|||||||
self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
|
self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
|
||||||
super().set_local(node_id, value)
|
super().set_local(node_id, value)
|
||||||
|
|
||||||
def ram_release(self, target):
|
def ram_release(self, target, free_active=False):
|
||||||
if psutil.virtual_memory().available >= target:
|
if psutil.virtual_memory().available >= target:
|
||||||
return
|
return
|
||||||
|
|
||||||
clean_list = []
|
clean_list = []
|
||||||
|
|
||||||
for key, cache_entry in self.cache.items():
|
for key, cache_entry in self.cache.items():
|
||||||
|
if not free_active and self.used_generation[key] == self.generation:
|
||||||
|
continue
|
||||||
oom_score = RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
|
oom_score = RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
|
||||||
|
|
||||||
ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
|
ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
|
||||||
@ -542,6 +545,9 @@ class RAMPressureCache(LRUCache):
|
|||||||
scan_list_for_ram_usage(output)
|
scan_list_for_ram_usage(output)
|
||||||
elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
|
elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
|
||||||
ram_usage += output.numel() * output.element_size()
|
ram_usage += output.numel() * output.element_size()
|
||||||
|
elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
|
||||||
|
#old ModelPatchers are the first to go
|
||||||
|
ram_usage = 1e30
|
||||||
scan_list_for_ram_usage(cache_entry.outputs)
|
scan_list_for_ram_usage(cache_entry.outputs)
|
||||||
|
|
||||||
oom_score *= ram_usage
|
oom_score *= ram_usage
|
||||||
|
|||||||
@ -779,7 +779,7 @@ class PromptExecutor:
|
|||||||
|
|
||||||
if self.cache_type == CacheType.RAM_PRESSURE:
|
if self.cache_type == CacheType.RAM_PRESSURE:
|
||||||
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
|
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
|
||||||
comfy.memory_management.extra_ram_release(ram_headroom)
|
ram_release_callback(ram_headroom, free_active=True)
|
||||||
else:
|
else:
|
||||||
# Only execute when the while-loop ends without break
|
# Only execute when the while-loop ends without break
|
||||||
# Send cached UI for intermediate output nodes that weren't executed
|
# Send cached UI for intermediate output nodes that weren't executed
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user