From 2a4328d639810858aa625c7bfedb974a13a57abe Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Wed, 11 Feb 2026 11:53:42 -0800 Subject: [PATCH] ace15: Use dynamic_vram friendly trange (#12409) Factor out the ksampler trange and use it in ACE LLM to prevent the silent stall at 0 and rate distortion due to first-step model load. --- comfy/k_diffusion/sampling.py | 32 ++------------------------------ comfy/text_encoders/ace15.py | 3 +-- comfy/utils.py | 27 +++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index c0c51d51a..6978eb717 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1,12 +1,11 @@ import math -import time from functools import partial from scipy import integrate import torch from torch import nn import torchsde -from tqdm.auto import trange as trange_, tqdm +from tqdm.auto import tqdm from . import utils from . import deis @@ -15,34 +14,7 @@ import comfy.model_patcher import comfy.model_sampling import comfy.memory_management - - -def trange(*args, **kwargs): - if comfy.memory_management.aimdo_allocator is None: - return trange_(*args, **kwargs) - - pbar = trange_(*args, **kwargs, smoothing=1.0) - pbar._i = 0 - pbar.set_postfix_str(" Model Initializing ... ") - - _update = pbar.update - - def warmup_update(n=1): - pbar._i += 1 - if pbar._i == 1: - pbar.i1_time = time.time() - pbar.set_postfix_str(" Model Initialization complete! ") - elif pbar._i == 2: - #bring forward the effective start time based the the diff between first and second iteration - #to attempt to remove load overhead from the final step rate estimate. - pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time) - pbar.set_postfix_str("") - - _update(n) - - pbar.update = warmup_update - return pbar - +from comfy.utils import model_trange as trange def append_zero(x): return torch.cat([x, x.new_zeros([1])]) diff --git a/comfy/text_encoders/ace15.py b/comfy/text_encoders/ace15.py index 73697b3c1..b8198a820 100644 --- a/comfy/text_encoders/ace15.py +++ b/comfy/text_encoders/ace15.py @@ -3,7 +3,6 @@ import comfy.text_encoders.llama from comfy import sd1_clip import torch import math -from tqdm.auto import trange import yaml import comfy.utils @@ -52,7 +51,7 @@ def sample_manual_loop_no_classes( progress_bar = comfy.utils.ProgressBar(max_new_tokens) - for step in trange(max_new_tokens, desc="LM sampling"): + for step in comfy.utils.model_trange(max_new_tokens, desc="LM sampling"): outputs = model.transformer(None, attention_mask, embeds=embeds.to(execution_dtype), num_tokens=num_tokens, intermediate_output=None, dtype=execution_dtype, embeds_info=embeds_info, past_key_values=past_key_values) next_token_logits = model.transformer.logits(outputs[0])[:, -1] past_key_values = outputs[2] diff --git a/comfy/utils.py b/comfy/utils.py index edd80cebe..e0a94e2e1 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -27,6 +27,7 @@ from PIL import Image import logging import itertools from torch.nn.functional import interpolate +from tqdm.auto import trange from einops import rearrange from comfy.cli_args import args, enables_dynamic_vram import json @@ -1155,6 +1156,32 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None): return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar) +def model_trange(*args, **kwargs): + if comfy.memory_management.aimdo_allocator is None: + return trange(*args, **kwargs) + + pbar = trange(*args, **kwargs, smoothing=1.0) + pbar._i = 0 + pbar.set_postfix_str(" Model Initializing ... ") + + _update = pbar.update + + def warmup_update(n=1): + pbar._i += 1 + if pbar._i == 1: + pbar.i1_time = time.time() + pbar.set_postfix_str(" Model Initialization complete! ") + elif pbar._i == 2: + #bring forward the effective start time based the the diff between first and second iteration + #to attempt to remove load overhead from the final step rate estimate. + pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time) + pbar.set_postfix_str("") + + _update(n) + + pbar.update = warmup_update + return pbar + PROGRESS_BAR_ENABLED = True def set_progress_bar_enabled(enabled): global PROGRESS_BAR_ENABLED