mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-16 03:57:27 +08:00
Merge pull request #9 from rattus128/dev/threaded-loader-2-ram-cache
threaded loader 2 + ram cache (CORE-43,CORE-117)
This commit is contained in:
commit
ea5775c620
@ -110,13 +110,11 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent
|
|||||||
|
|
||||||
parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
|
parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
|
||||||
|
|
||||||
CACHE_RAM_AUTO_GB = -1.0
|
|
||||||
|
|
||||||
cache_group = parser.add_mutually_exclusive_group()
|
cache_group = parser.add_mutually_exclusive_group()
|
||||||
|
cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
|
||||||
cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
|
cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
|
||||||
cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
|
cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
|
||||||
cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
|
cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
|
||||||
cache_group.add_argument("--cache-ram", nargs='?', const=CACHE_RAM_AUTO_GB, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threshold the cache removes large items to free RAM. Default (when no value is provided): 25%% of system RAM (min 4GB, max 32GB).")
|
|
||||||
|
|
||||||
attn_group = parser.add_mutually_exclusive_group()
|
attn_group = parser.add_mutually_exclusive_group()
|
||||||
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
|
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
|
||||||
@ -246,6 +244,9 @@ if comfy.options.args_parsing:
|
|||||||
else:
|
else:
|
||||||
args = parser.parse_args([])
|
args = parser.parse_args([])
|
||||||
|
|
||||||
|
if args.cache_ram is not None and len(args.cache_ram) > 2:
|
||||||
|
parser.error("--cache-ram accepts at most two values: active GB and inactive GB")
|
||||||
|
|
||||||
if args.windows_standalone_build:
|
if args.windows_standalone_build:
|
||||||
args.auto_launch = True
|
args.auto_launch = True
|
||||||
|
|
||||||
|
|||||||
@ -498,7 +498,11 @@ current_loaded_models = []
|
|||||||
|
|
||||||
DIRTY_MMAPS = set()
|
DIRTY_MMAPS = set()
|
||||||
|
|
||||||
PIN_PRESSURE_HYSTERESIS = 128 * 1024 * 1024
|
PIN_PRESSURE_HYSTERESIS = 256 * 1024 * 1024
|
||||||
|
|
||||||
|
#Freeing registerables on pressure does imply a GPU sync, so go big on
|
||||||
|
#the hysteresis so each expensive sync gives us back a good chunk.
|
||||||
|
REGISTERABLE_PIN_HYSTERESIS = 768 * 1024 * 1024
|
||||||
|
|
||||||
def module_size(module):
|
def module_size(module):
|
||||||
module_mem = 0
|
module_mem = 0
|
||||||
@ -525,15 +529,28 @@ def free_pins(size, evict_active=False):
|
|||||||
break
|
break
|
||||||
|
|
||||||
def ensure_pin_budget(size, evict_active=False):
|
def ensure_pin_budget(size, evict_active=False):
|
||||||
if MAX_PINNED_MEMORY <= 0:
|
if MAX_MODEL_MEMORY <= 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
|
shortfall = TOTAL_MODEL_MEMORY + size - MAX_MODEL_MEMORY
|
||||||
if shortfall <= 0:
|
if shortfall <= 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
|
free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
|
||||||
|
|
||||||
|
def ensure_pin_registerable(size, evict_active=False):
|
||||||
|
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
|
||||||
|
if MAX_PINNED_MEMORY <= 0 or shortfall <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
shortfall += REGISTERABLE_PIN_HYSTERESIS
|
||||||
|
for loaded_model in reversed(current_loaded_models):
|
||||||
|
model = loaded_model.model
|
||||||
|
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
|
||||||
|
shortfall -= model.unregister_inactive_pins(shortfall)
|
||||||
|
if shortfall <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
class LoadedModel:
|
class LoadedModel:
|
||||||
def __init__(self, model):
|
def __init__(self, model):
|
||||||
self._set_model(model)
|
self._set_model(model)
|
||||||
@ -1208,22 +1225,24 @@ def get_pin_buffer(offload_stream):
|
|||||||
return pin_buffer
|
return pin_buffer
|
||||||
|
|
||||||
def resize_pin_buffer(pin_buffer, size):
|
def resize_pin_buffer(pin_buffer, size):
|
||||||
global TOTAL_PINNED_MEMORY
|
global TOTAL_MODEL_MEMORY, TOTAL_PINNED_MEMORY
|
||||||
old_size = pin_buffer.size
|
old_size = pin_buffer.size
|
||||||
if size <= old_size:
|
if size <= old_size:
|
||||||
return True
|
return True
|
||||||
growth = size - old_size
|
growth = size - old_size
|
||||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM, free_active=True)
|
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM, free_active=True)
|
||||||
ensure_pin_budget(growth, evict_active=True)
|
ensure_pin_budget(growth, evict_active=True)
|
||||||
|
ensure_pin_registerable(growth, evict_active=True)
|
||||||
try:
|
try:
|
||||||
pin_buffer.extend(size=size, reallocate=True)
|
pin_buffer.extend(size=size, reallocate=True)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
return False
|
return False
|
||||||
|
TOTAL_MODEL_MEMORY += pin_buffer.size - old_size
|
||||||
TOTAL_PINNED_MEMORY += pin_buffer.size - old_size
|
TOTAL_PINNED_MEMORY += pin_buffer.size - old_size
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def reset_cast_buffers():
|
def reset_cast_buffers():
|
||||||
global TOTAL_PINNED_MEMORY
|
global TOTAL_MODEL_MEMORY, TOTAL_PINNED_MEMORY
|
||||||
global LARGEST_CASTED_WEIGHT
|
global LARGEST_CASTED_WEIGHT
|
||||||
global LARGEST_AIMDO_CASTED_WEIGHT
|
global LARGEST_AIMDO_CASTED_WEIGHT
|
||||||
|
|
||||||
@ -1239,16 +1258,17 @@ def reset_cast_buffers():
|
|||||||
DIRTY_MMAPS.clear()
|
DIRTY_MMAPS.clear()
|
||||||
|
|
||||||
for pin_buffer in STREAM_PIN_BUFFERS.values():
|
for pin_buffer in STREAM_PIN_BUFFERS.values():
|
||||||
|
TOTAL_MODEL_MEMORY -= pin_buffer.size
|
||||||
TOTAL_PINNED_MEMORY -= pin_buffer.size
|
TOTAL_PINNED_MEMORY -= pin_buffer.size
|
||||||
if TOTAL_PINNED_MEMORY < 0:
|
TOTAL_MODEL_MEMORY = max(0, TOTAL_MODEL_MEMORY)
|
||||||
TOTAL_PINNED_MEMORY = 0
|
TOTAL_PINNED_MEMORY = max(0, TOTAL_PINNED_MEMORY)
|
||||||
|
|
||||||
for loaded_model in current_loaded_models:
|
for loaded_model in current_loaded_models:
|
||||||
model = loaded_model.model
|
model = loaded_model.model
|
||||||
if model is not None and model.is_dynamic():
|
if model is not None and model.is_dynamic():
|
||||||
model.model.dynamic_pins[model.load_device]["active"] = False
|
model.model.dynamic_pins[model.load_device]["active"] = False
|
||||||
model.partially_unload_ram(1e30, subsets=[ "patches" ])
|
model.partially_unload_ram(1e30, subsets=[ "patches" ])
|
||||||
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [])
|
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1])
|
||||||
|
|
||||||
STREAM_CAST_BUFFERS.clear()
|
STREAM_CAST_BUFFERS.clear()
|
||||||
STREAM_AIMDO_CAST_BUFFERS.clear()
|
STREAM_AIMDO_CAST_BUFFERS.clear()
|
||||||
@ -1352,14 +1372,18 @@ def cast_to_device(tensor, device, dtype, copy=False):
|
|||||||
|
|
||||||
|
|
||||||
PINNED_MEMORY = {}
|
PINNED_MEMORY = {}
|
||||||
|
TOTAL_MODEL_MEMORY = 0
|
||||||
TOTAL_PINNED_MEMORY = 0
|
TOTAL_PINNED_MEMORY = 0
|
||||||
|
MAX_MODEL_MEMORY = -1
|
||||||
MAX_PINNED_MEMORY = -1
|
MAX_PINNED_MEMORY = -1
|
||||||
if not args.disable_pinned_memory:
|
if not args.disable_pinned_memory:
|
||||||
if is_nvidia() or is_amd():
|
if is_nvidia() or is_amd():
|
||||||
|
ram = get_total_memory(torch.device("cpu"))
|
||||||
|
MAX_MODEL_MEMORY = min(ram - 4 * 1024 * 1024 * 1024, ram * 0.90)
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40 # Windows limit is apparently 50%
|
MAX_PINNED_MEMORY = ram * 0.40 # Windows limit is apparently 50%
|
||||||
else:
|
else:
|
||||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
|
MAX_PINNED_MEMORY = ram * 0.90
|
||||||
logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
|
logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
|
||||||
|
|
||||||
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
|
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
|
||||||
@ -1396,7 +1420,7 @@ def pin_memory(tensor):
|
|||||||
|
|
||||||
size = tensor.nbytes
|
size = tensor.nbytes
|
||||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
||||||
ensure_pin_budget(size)
|
ensure_pin_registerable(size)
|
||||||
|
|
||||||
ptr = tensor.data_ptr()
|
ptr = tensor.data_ptr()
|
||||||
if ptr == 0:
|
if ptr == 0:
|
||||||
@ -1433,7 +1457,8 @@ def unpin_memory(tensor):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
||||||
TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
|
size = PINNED_MEMORY.pop(ptr)
|
||||||
|
TOTAL_PINNED_MEMORY -= size
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
logging.warning("Unpin error.")
|
logging.warning("Unpin error.")
|
||||||
|
|||||||
@ -1557,8 +1557,8 @@ class ModelPatcherDynamic(ModelPatcher):
|
|||||||
self.model.dynamic_pins = {}
|
self.model.dynamic_pins = {}
|
||||||
if self.load_device not in self.model.dynamic_pins:
|
if self.load_device not in self.model.dynamic_pins:
|
||||||
self.model.dynamic_pins[self.load_device] = {
|
self.model.dynamic_pins[self.load_device] = {
|
||||||
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), []),
|
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), [], [-1]),
|
||||||
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), []),
|
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1]),
|
||||||
"failed": False,
|
"failed": False,
|
||||||
"active": False,
|
"active": False,
|
||||||
}
|
}
|
||||||
@ -1761,19 +1761,44 @@ class ModelPatcherDynamic(ModelPatcher):
|
|||||||
return (self.model.dynamic_pins[self.load_device]["weights"][0].size +
|
return (self.model.dynamic_pins[self.load_device]["weights"][0].size +
|
||||||
self.model.dynamic_pins[self.load_device]["patches"][0].size)
|
self.model.dynamic_pins[self.load_device]["patches"][0].size)
|
||||||
|
|
||||||
|
def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]):
|
||||||
|
freed = 0
|
||||||
|
pin_state = self.model.dynamic_pins[self.load_device]
|
||||||
|
for subset in subsets:
|
||||||
|
hostbuf, stack, stack_split = pin_state[subset]
|
||||||
|
split = stack_split[0]
|
||||||
|
while split >= 0:
|
||||||
|
module, offset = stack[split]
|
||||||
|
split -= 1
|
||||||
|
stack_split[0] = split
|
||||||
|
if not module._pin_registered:
|
||||||
|
continue
|
||||||
|
size = module._pin.numel() * module._pin.element_size()
|
||||||
|
if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
|
||||||
|
comfy.model_management.discard_cuda_async_error()
|
||||||
|
continue
|
||||||
|
module._pin_registered = False
|
||||||
|
comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
|
||||||
|
freed += size
|
||||||
|
ram_to_unload -= size
|
||||||
|
if ram_to_unload <= 0:
|
||||||
|
return freed
|
||||||
|
return freed
|
||||||
|
|
||||||
def partially_unload_ram(self, ram_to_unload, subsets=[ "weights", "patches" ]):
|
def partially_unload_ram(self, ram_to_unload, subsets=[ "weights", "patches" ]):
|
||||||
freed = 0
|
freed = 0
|
||||||
pin_state = self.model.dynamic_pins[self.load_device]
|
pin_state = self.model.dynamic_pins[self.load_device]
|
||||||
for subset in subsets:
|
for subset in subsets:
|
||||||
hostbuf, stack = pin_state[subset]
|
hostbuf, stack, stack_split = pin_state[subset]
|
||||||
while len(stack) > 0:
|
while len(stack) > 0:
|
||||||
module, offset = stack.pop()
|
module, offset = stack.pop()
|
||||||
size = module._pin.numel() * module._pin.element_size()
|
size = module._pin.numel() * module._pin.element_size()
|
||||||
del module._pin
|
del module._pin
|
||||||
hostbuf.truncate(offset)
|
hostbuf.truncate(offset, do_unregister=module._pin_registered)
|
||||||
comfy.model_management.TOTAL_PINNED_MEMORY -= size
|
stack_split[0] = min(stack_split[0], len(stack) - 1)
|
||||||
if comfy.model_management.TOTAL_PINNED_MEMORY < 0:
|
comfy.model_management.TOTAL_MODEL_MEMORY = max(0, comfy.model_management.TOTAL_MODEL_MEMORY - size)
|
||||||
comfy.model_management.TOTAL_PINNED_MEMORY = 0
|
if module._pin_registered:
|
||||||
|
comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
|
||||||
freed += size
|
freed += size
|
||||||
ram_to_unload -= size
|
ram_to_unload -= size
|
||||||
if ram_to_unload <= 0:
|
if ram_to_unload <= 0:
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import comfy.model_management
|
|||||||
import comfy.memory_management
|
import comfy.memory_management
|
||||||
import comfy_aimdo.host_buffer
|
import comfy_aimdo.host_buffer
|
||||||
import comfy_aimdo.torch
|
import comfy_aimdo.torch
|
||||||
|
import torch
|
||||||
|
|
||||||
from comfy.cli_args import args
|
from comfy.cli_args import args
|
||||||
|
|
||||||
@ -10,15 +11,33 @@ def get_pin(module, subset="weights"):
|
|||||||
|
|
||||||
def pin_memory(module, subset="weights", size=None):
|
def pin_memory(module, subset="weights", size=None):
|
||||||
pin_state = module._pin_state
|
pin_state = module._pin_state
|
||||||
if pin_state["failed"] or args.disable_pinned_memory or get_pin(module, subset) is not None:
|
if pin_state["failed"] or args.disable_pinned_memory:
|
||||||
return
|
return
|
||||||
|
|
||||||
hostbuf, stack = pin_state[subset]
|
hostbuf, stack, stack_split = pin_state[subset]
|
||||||
|
pin = get_pin(module, subset)
|
||||||
|
if pin is not None:
|
||||||
|
if module._pin_registered:
|
||||||
|
return
|
||||||
|
|
||||||
|
size = module._pin.nbytes
|
||||||
|
comfy.model_management.ensure_pin_registerable(size)
|
||||||
|
|
||||||
|
if torch.cuda.cudart().cudaHostRegister(module._pin.data_ptr(), size, 1) != 0:
|
||||||
|
comfy.model_management.discard_cuda_async_error()
|
||||||
|
return False
|
||||||
|
module._pin_registered = True
|
||||||
|
stack_split[0] = max(stack_split[0], module._pin_stack_index)
|
||||||
|
comfy.model_management.TOTAL_PINNED_MEMORY += size
|
||||||
|
return True
|
||||||
|
|
||||||
if size is None:
|
if size is None:
|
||||||
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
||||||
offset = hostbuf.size
|
offset = hostbuf.size
|
||||||
|
|
||||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
||||||
comfy.model_management.ensure_pin_budget(size)
|
comfy.model_management.ensure_pin_budget(size)
|
||||||
|
comfy.model_management.ensure_pin_registerable(size)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
hostbuf.extend(size=size)
|
hostbuf.extend(size=size)
|
||||||
@ -29,5 +48,9 @@ def pin_memory(module, subset="weights", size=None):
|
|||||||
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||||
module._pin.untyped_storage()._comfy_hostbuf = hostbuf
|
module._pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||||
stack.append((module, offset))
|
stack.append((module, offset))
|
||||||
|
module._pin_registered = True
|
||||||
|
module._pin_stack_index = len(stack) - 1
|
||||||
|
stack_split[0] = max(stack_split[0], module._pin_stack_index)
|
||||||
|
comfy.model_management.TOTAL_MODEL_MEMORY += size
|
||||||
comfy.model_management.TOTAL_PINNED_MEMORY += size
|
comfy.model_management.TOTAL_PINNED_MEMORY += size
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -728,6 +728,7 @@ class PromptExecutor:
|
|||||||
|
|
||||||
self._notify_prompt_lifecycle("start", prompt_id)
|
self._notify_prompt_lifecycle("start", prompt_id)
|
||||||
ram_headroom = int(self.cache_args["ram"] * (1024 ** 3))
|
ram_headroom = int(self.cache_args["ram"] * (1024 ** 3))
|
||||||
|
ram_inactive_headroom = int(self.cache_args["ram_inactive"] * (1024 ** 3))
|
||||||
ram_release_callback = self.caches.outputs.ram_release if self.cache_type == CacheType.RAM_PRESSURE else None
|
ram_release_callback = self.caches.outputs.ram_release if self.cache_type == CacheType.RAM_PRESSURE else None
|
||||||
comfy.memory_management.set_ram_cache_release_state(ram_release_callback, ram_headroom)
|
comfy.memory_management.set_ram_cache_release_state(ram_release_callback, ram_headroom)
|
||||||
|
|
||||||
@ -781,7 +782,7 @@ class PromptExecutor:
|
|||||||
execution_list.complete_node_execution()
|
execution_list.complete_node_execution()
|
||||||
|
|
||||||
if self.cache_type == CacheType.RAM_PRESSURE:
|
if self.cache_type == CacheType.RAM_PRESSURE:
|
||||||
ram_release_callback(ram_headroom)
|
ram_release_callback(ram_inactive_headroom)
|
||||||
ram_shortfall = ram_headroom - psutil.virtual_memory().available
|
ram_shortfall = ram_headroom - psutil.virtual_memory().available
|
||||||
comfy.model_management.free_pins(ram_shortfall)
|
comfy.model_management.free_pins(ram_shortfall)
|
||||||
ram_release_callback(ram_headroom, free_active=True)
|
ram_release_callback(ram_headroom, free_active=True)
|
||||||
|
|||||||
20
main.py
20
main.py
@ -283,19 +283,25 @@ def _collect_output_absolute_paths(history_result: dict) -> list[str]:
|
|||||||
|
|
||||||
def prompt_worker(q, server_instance):
|
def prompt_worker(q, server_instance):
|
||||||
current_time: float = 0.0
|
current_time: float = 0.0
|
||||||
cache_ram = args.cache_ram
|
cache_ram = 0
|
||||||
if cache_ram < 0:
|
cache_ram_inactive = 0
|
||||||
|
if not args.cache_classic and not args.cache_none and args.cache_lru <= 0:
|
||||||
cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
|
cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
|
||||||
|
cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0))
|
||||||
|
if len(args.cache_ram) > 0:
|
||||||
|
cache_ram = args.cache_ram[0]
|
||||||
|
if len(args.cache_ram) > 1:
|
||||||
|
cache_ram_inactive = args.cache_ram[1]
|
||||||
|
|
||||||
cache_type = execution.CacheType.CLASSIC
|
cache_type = execution.CacheType.RAM_PRESSURE
|
||||||
if args.cache_lru > 0:
|
if args.cache_classic:
|
||||||
|
cache_type = execution.CacheType.CLASSIC
|
||||||
|
elif args.cache_lru > 0:
|
||||||
cache_type = execution.CacheType.LRU
|
cache_type = execution.CacheType.LRU
|
||||||
elif cache_ram > 0:
|
|
||||||
cache_type = execution.CacheType.RAM_PRESSURE
|
|
||||||
elif args.cache_none:
|
elif args.cache_none:
|
||||||
cache_type = execution.CacheType.NONE
|
cache_type = execution.CacheType.NONE
|
||||||
|
|
||||||
e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : cache_ram } )
|
e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : cache_ram, "ram_inactive" : cache_ram_inactive } )
|
||||||
last_gc_collect = 0
|
last_gc_collect = 0
|
||||||
need_gc = False
|
need_gc = False
|
||||||
gc_collect_interval = 10.0
|
gc_collect_interval = 10.0
|
||||||
|
|||||||
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
|
|||||||
filelock
|
filelock
|
||||||
av>=14.2.0
|
av>=14.2.0
|
||||||
comfy-kitchen>=0.2.8
|
comfy-kitchen>=0.2.8
|
||||||
comfy-aimdo==0.4.0
|
comfy-aimdo==0.4.1
|
||||||
requests
|
requests
|
||||||
simpleeval>=1.0.0
|
simpleeval>=1.0.0
|
||||||
blake3
|
blake3
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user