Compare commits

...

5 Commits

Author SHA1 Message Date
huyua9
bd955f5085
Merge 7b7becc34c into 1265955b34 2026-05-04 19:41:16 -04:00
rattus
1265955b34
ops: handle multi-compute of the same weight (#13705)
If the same weight is used multiple times within the same prefetch
window, it should only apply compute state mutations once. Mark the
weight as fully resident on the first pass accordingly.
2026-05-04 16:40:57 -07:00
rattus
1ac78180b3
make control-net load order deterministic (#13701)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Make this deterministic so speeds dont change base of load order. Load
them in reverse order so whatever the caller lists first is the top
priority.
2026-05-04 12:58:06 -07:00
rattus
c47633f3be
prefetch: guard against no offload (#13703)
cast_ will return no stream if there is no work to do. guard against
this is the consume logic.
2026-05-04 12:56:05 -07:00
Jukka Seppänen
c33d26c283
fix: Proper memory estimation for frame interpolation when not using dynamic VRAM (#13698) 2026-05-04 20:20:40 +03:00
7 changed files with 23 additions and 13 deletions

View File

@ -721,13 +721,15 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
else:
minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory())
models_temp = set()
# Order-preserving dedup. A plain set() would randomize iteration order across runs
models_temp = {}
for m in models:
models_temp.add(m)
models_temp[m] = None
for mm in m.model_patches_models():
models_temp.add(mm)
models_temp[mm] = None
models = models_temp
models = list(models_temp)
models.reverse()
models_to_load = []

View File

@ -37,7 +37,8 @@ def prefetch_queue_pop(queue, device, module):
consumed = queue.pop(0)
if consumed is not None:
offload_stream, prefetch_state = consumed
offload_stream.wait_stream(comfy.model_management.current_stream(device))
if offload_stream is not None:
offload_stream.wait_stream(comfy.model_management.current_stream(device))
_, comfy_modules = prefetch_state
if comfy_modules is not None:
cleanup_prefetched_modules(comfy_modules)

View File

@ -253,6 +253,9 @@ def resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, w
if bias is not None:
bias = post_cast(s, "bias", bias, bias_dtype, prefetch["resident"], update_weight)
if prefetch["signature"] is not None:
prefetch["resident"] = True
return weight, bias

View File

@ -89,7 +89,8 @@ def get_additional_models(conds, dtype):
gligen += get_models_from_cond(conds[k], "gligen")
add_models += get_models_from_cond(conds[k], "additional_models")
control_nets = set(cnets)
# Order-preserving dedup. A plain set() would randomize iteration order across runs
control_nets = list(dict.fromkeys(cnets))
inference_memory = 0
control_models = []

View File

@ -199,6 +199,9 @@ class FILMNet(nn.Module):
def get_dtype(self):
return self.extract.extract_sublevels.convs[0][0].conv.weight.dtype
def memory_used_forward(self, shape, dtype):
return 1700 * shape[1] * shape[2] * dtype.itemsize
def _build_warp_grids(self, H, W, device):
"""Pre-compute warp grids for all pyramid levels."""
if (H, W) in self._warp_grids:

View File

@ -74,6 +74,9 @@ class IFNet(nn.Module):
def get_dtype(self):
return self.encode.cnn0.weight.dtype
def memory_used_forward(self, shape, dtype):
return 300 * shape[1] * shape[2] * dtype.itemsize
def _build_warp_grids(self, H, W, device):
if (H, W) in self._warp_grids:
return

View File

@ -37,7 +37,7 @@ class FrameInterpolationModelLoader(io.ComfyNode):
model = cls._detect_and_load(sd)
dtype = torch.float16 if model_management.should_use_fp16(model_management.get_torch_device()) else torch.float32
model.eval().to(dtype)
patcher = comfy.model_patcher.ModelPatcher(
patcher = comfy.model_patcher.CoreModelPatcher(
model,
load_device=model_management.get_torch_device(),
offload_device=model_management.unet_offload_device(),
@ -98,16 +98,13 @@ class FrameInterpolate(io.ComfyNode):
if num_frames < 2 or multiplier < 2:
return io.NodeOutput(images)
model_management.load_model_gpu(interp_model)
device = interp_model.load_device
dtype = interp_model.model_dtype()
inference_model = interp_model.model
# Free VRAM for inference activations (model weights + ~20x a single frame's worth)
H, W = images.shape[1], images.shape[2]
activation_mem = H * W * 3 * images.element_size() * 20
model_management.free_memory(activation_mem, device)
activation_mem = inference_model.memory_used_forward(images.shape, dtype)
model_management.load_models_gpu([interp_model], memory_required=activation_mem)
align = getattr(inference_model, "pad_align", 1)
H, W = images.shape[1], images.shape[2]
# Prepare a single padded frame on device for determining output dimensions
def prepare_frame(idx):