Compare commits

...

5 Commits

Author SHA1 Message Date
Christian Byrne
2ec859c1cf
Merge 562e355f97 into c05a08ae66 2026-02-02 19:09:40 -06:00
comfyanonymous
c05a08ae66
Add back function. (#12234)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-02-02 19:52:07 -05:00
rattus
de9ada6a41
Dynamic VRAM unloading fix (#12227)
* mp: fix full dynamic unloading

This was not unloading dynamic models when requesting a full unload via
the unpatch() code path.

This was ok, i your workflow was all dynamic models but fails with big
VRAM leaks if you need to fully unload something for a regular ModelPatcher

It also fices the "unload models" button.

* mm: load models outside of Aimdo Mempool

In dynamic_vram mode, escape the Aimdo mempool and load into the regular
mempool. Use a dummy thread to do it.
2026-02-02 17:35:20 -05:00
rattus
37f711d4a1
mm: Fix cast buffers with intel offloading (#12229)
Intel has offloading support but there were some nvidia calls in the
new cast buffer stuff.
2026-02-02 17:34:46 -05:00
bymyself
562e355f97 feat: add ResolutionSelector node for aspect ratio and megapixel-based resolution calculation
Amp-Thread-ID: https://ampcode.com/threads/T-019c179e-cd8c-768f-ae66-207c7a53c01d
2026-01-31 22:16:34 -08:00
4 changed files with 117 additions and 12 deletions

View File

@ -19,7 +19,8 @@
import psutil
import logging
from enum import Enum
from comfy.cli_args import args, PerformanceFeature
from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
import threading
import torch
import sys
import platform
@ -650,7 +651,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
soft_empty_cache()
return unloaded_models
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
cleanup_models_gc()
global vram_state
@ -746,6 +747,26 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
current_loaded_models.insert(0, loaded_model)
return
def load_models_gpu_thread(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load):
with torch.inference_mode():
load_models_gpu_orig(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
soft_empty_cache()
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
#Deliberately load models outside of the Aimdo mempool so they can be retained accross
#nodes. Use a dummy thread to do it as pytorch documents that mempool contexts are
#thread local. So exploit that to escape context
if enables_dynamic_vram():
t = threading.Thread(
target=load_models_gpu_thread,
args=(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
)
t.start()
t.join()
else:
load_models_gpu_orig(models, memory_required=memory_required, force_patch_weights=force_patch_weights,
minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
def load_model_gpu(model):
return load_models_gpu([model])
@ -1112,11 +1133,11 @@ def get_cast_buffer(offload_stream, device, size, ref):
return None
if cast_buffer is not None and cast_buffer.numel() > 50 * (1024 ** 2):
#I want my wrongly sized 50MB+ of VRAM back from the caching allocator right now
torch.cuda.synchronize()
synchronize()
del STREAM_CAST_BUFFERS[offload_stream]
del cast_buffer
#FIXME: This doesn't work in Aimdo because mempool cant clear cache
torch.cuda.empty_cache()
soft_empty_cache()
with wf_context:
cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
STREAM_CAST_BUFFERS[offload_stream] = cast_buffer
@ -1132,9 +1153,7 @@ def reset_cast_buffers():
for offload_stream in STREAM_CAST_BUFFERS:
offload_stream.synchronize()
STREAM_CAST_BUFFERS.clear()
if comfy.memory_management.aimdo_allocator is None:
#Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist
torch.cuda.empty_cache()
soft_empty_cache()
def get_offload_stream(device):
stream_counter = stream_counters.get(device, 0)
@ -1284,7 +1303,7 @@ def discard_cuda_async_error():
a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
_ = a + b
torch.cuda.synchronize()
synchronize()
except torch.AcceleratorError:
#Dump it! We already know about it from the synchronous return
pass
@ -1688,6 +1707,12 @@ def lora_compute_dtype(device):
LORA_COMPUTE_DTYPES[device] = dtype
return dtype
def synchronize():
if is_intel_xpu():
torch.xpu.synchronize()
elif torch.cuda.is_available():
torch.cuda.synchronize()
def soft_empty_cache(force=False):
global cpu_state
if cpu_state == CPUState.MPS:
@ -1713,9 +1738,6 @@ def debug_memory_summary():
return torch.cuda.memory.memory_summary()
return ""
#TODO: might be cleaner to put this somewhere else
import threading
class InterruptProcessingException(Exception):
pass

View File

@ -1597,7 +1597,7 @@ class ModelPatcherDynamic(ModelPatcher):
if unpatch_weights:
self.partially_unload_ram(1e32)
self.partially_unload(None)
self.partially_unload(None, 1e32)
def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
assert not force_patch_weights #See above

View File

@ -0,0 +1,82 @@
from __future__ import annotations
import math
from enum import Enum
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
class AspectRatio(str, Enum):
SQUARE = "1:1 (Square)"
PHOTO_H = "3:2 (Photo)"
STANDARD_H = "4:3 (Standard)"
WIDESCREEN_H = "16:9 (Widescreen)"
ULTRAWIDE_H = "21:9 (Ultrawide)"
PHOTO_V = "2:3 (Portrait Photo)"
STANDARD_V = "3:4 (Portrait Standard)"
WIDESCREEN_V = "9:16 (Portrait Widescreen)"
ASPECT_RATIOS: dict[str, tuple[int, int]] = {
"1:1 (Square)": (1, 1),
"3:2 (Photo)": (3, 2),
"4:3 (Standard)": (4, 3),
"16:9 (Widescreen)": (16, 9),
"21:9 (Ultrawide)": (21, 9),
"2:3 (Portrait Photo)": (2, 3),
"3:4 (Portrait Standard)": (3, 4),
"9:16 (Portrait Widescreen)": (9, 16),
}
class ResolutionSelector(io.ComfyNode):
"""Calculate width and height from aspect ratio and megapixel target."""
@classmethod
def define_schema(cls):
return io.Schema(
node_id="ResolutionSelector",
display_name="Resolution Selector",
category="utils",
description="Calculate width and height from aspect ratio and megapixel target. Useful for setting up Empty Latent Image dimensions.",
inputs=[
io.Combo.Input(
"aspect_ratio",
options=AspectRatio,
default=AspectRatio.SQUARE,
tooltip="The aspect ratio for the output dimensions.",
),
io.Float.Input(
"megapixels",
default=1.0,
min=0.1,
max=16.0,
step=0.1,
tooltip="Target total megapixels. 1.0 MP ≈ 1024×1024 for square.",
),
],
outputs=[
io.Int.Output("width", tooltip="Calculated width in pixels (multiple of 8)."),
io.Int.Output("height", tooltip="Calculated height in pixels (multiple of 8)."),
],
)
@classmethod
def execute(cls, aspect_ratio: str, megapixels: float) -> io.NodeOutput:
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
total_pixels = megapixels * 1024 * 1024
scale = math.sqrt(total_pixels / (w_ratio * h_ratio))
width = round(w_ratio * scale / 8) * 8
height = round(h_ratio * scale / 8) * 8
return io.NodeOutput(width, height)
class ResolutionExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
ResolutionSelector,
]
async def comfy_entrypoint() -> ResolutionExtension:
return ResolutionExtension()

View File

@ -2427,6 +2427,7 @@ async def init_builtin_extra_nodes():
"nodes_audio_encoder.py",
"nodes_rope.py",
"nodes_logic.py",
"nodes_resolution.py",
"nodes_nop.py",
"nodes_kandinsky5.py",
"nodes_wanmove.py",