Merge fc2f500571 into fcd9a236b0

Update template to 0.7.69 (#11719 )
Add warning for old pytorch. (#11718 )
2026-01-26 22:30:19 +08:00 · 2026-01-08 06:03:29 +03:00 · 2026-01-07 18:22:23 -08:00 · 2026-01-07 21:07:26 -05:00 · 2026-01-07 21:01:16 -05:00 · 2026-01-07 20:11:22 -05:00
9 changed files with 80 additions and 26 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -718,6 +718,7 @@ class ModelPatcher:
                            continue

                cast_weight = self.force_cast_weights
+                m.comfy_force_cast_weights = self.force_cast_weights
                if lowvram_weight:
                    if hasattr(m, "comfy_cast_weights"):
                        m.weight_function = []
@ -790,11 +791,12 @@ class ModelPatcher:
                for param in params:
                    self.pin_weight_to_device("{}.{}".format(n, param))

+            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
-                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                self.model.model_lowvram = True
            else:
-                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
                self.model.model_lowvram = False
                if full_load:
                    self.model.to(device_to)
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -654,29 +654,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                run_every_op()

                input_shape = input.shape
-                tensor_3d = input.ndim == 3
-
-                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
-                    return self.forward_comfy_cast_weights(input, *args, **kwargs)
+                reshaped_3d = False

                if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor)):
+                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
+                    not getattr(self, 'comfy_force_cast_weights', False) and
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0):

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    if tensor_3d:
-                        input = input.reshape(-1, input_shape[2])
+                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

-                    if input.ndim != 2:
-                        # Fall back to comfy_cast_weights for non-2D tensors
-                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
+                    # Fall back to non-quantized for non-2D tensors
+                    if input_reshaped.ndim == 2:
+                        reshaped_3d = input.ndim == 3
+                        # dtype is now implicit in the layout class
+                        scale = getattr(self, 'input_scale', None)
+                        if scale is not None:
+                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

-                    # dtype is now implicit in the layout class
-                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
-
-                output = self._forward(input, self.weight, self.bias)
+                output = self.forward_comfy_cast_weights(input)

                # Reshape output back to 3D if input was 3D
-                if tensor_3d:
+                if reshaped_3d:
                    output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

                return output
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -19,6 +19,7 @@ try:
        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
        if cuda_version < (13,):
            ck.registry.disable("cuda")
+            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -218,7 +218,7 @@ class CLIP:
            if unprojected:
                self.cond_stage_model.set_clip_options({"projected_pooled": False})

-            self.load_model()
+            self.load_model(tokens)
            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
            all_hooks.reset()
            self.patcher.patch_hooks(None)
@ -266,7 +266,7 @@ class CLIP:
        if return_pooled == "unprojected":
            self.cond_stage_model.set_clip_options({"projected_pooled": False})

-        self.load_model()
+        self.load_model(tokens)
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
        o = self.cond_stage_model.encode_token_weights(tokens)
        cond, pooled = o[:2]
@ -299,8 +299,11 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

-    def load_model(self):
-        model_management.load_model_gpu(self.patcher)
+    def load_model(self, tokens={}):
+        memory_used = 0
+        if hasattr(self.cond_stage_model, "memory_estimation_function"):
+            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
        return self.patcher

    def get_key_patches(self):
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -845,7 +845,7 @@ class LTXAV(LTXV):

    def __init__(self, unet_config):
        super().__init__(unet_config)
-        self.memory_usage_factor = 0.055  # TODO
+        self.memory_usage_factor = 0.061  # TODO

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.LTXAV(self, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -98,10 +98,13 @@ class LTXAVTEModel(torch.nn.Module):

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
+        if comfy.model_management.should_use_bf16(self.execution_device):
+            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
        out = out.movedim(1, -1).to(self.execution_device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
+        out = out.float()
        out_vid = self.video_embeddings_connector(out)[0]
        out_audio = self.audio_embeddings_connector(out)[0]
        out = torch.concat((out_vid, out_audio), dim=-1)
@ -118,6 +121,14 @@ class LTXAVTEModel(torch.nn.Module):

            return self.load_state_dict(sdo, strict=False)

+    def memory_estimation_function(self, token_weight_pairs, device=None):
+        constant = 6.0
+        if comfy.model_management.should_use_bf16(device):
+            constant /= 2.0
+
+        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
    class LTXAVTEModel_(LTXAVTEModel):
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -30,6 +30,7 @@ from torch.nn.functional import interpolate
 from einops import rearrange
 from comfy.cli_args import args
 import json
+import time

 MMAP_TORCH_FILES = args.mmap_torch_files
 DISABLE_MMAP = args.disable_mmap
@ -1097,6 +1098,10 @@ def set_progress_bar_global_hook(function):
    global PROGRESS_BAR_HOOK
    PROGRESS_BAR_HOOK = function

+# Throttle settings for progress bar updates to reduce WebSocket flooding
+PROGRESS_THROTTLE_MIN_INTERVAL = 0.1  # 100ms minimum between updates
+PROGRESS_THROTTLE_MIN_PERCENT = 0.5   # 0.5% minimum progress change
+
 class ProgressBar:
    def __init__(self, total, node_id=None):
        global PROGRESS_BAR_HOOK
@ -1104,6 +1109,8 @@ class ProgressBar:
        self.current = 0
        self.hook = PROGRESS_BAR_HOOK
        self.node_id = node_id
+        self._last_update_time = 0.0
+        self._last_sent_value = -1

    def update_absolute(self, value, total=None, preview=None):
        if total is not None:
@ -1112,7 +1119,29 @@ class ProgressBar:
            value = self.total
        self.current = value
        if self.hook is not None:
-            self.hook(self.current, self.total, preview, node_id=self.node_id)
+            current_time = time.perf_counter()
+            is_first = (self._last_sent_value < 0)
+            is_final = (value >= self.total)
+            has_preview = (preview is not None)
+
+            # Always send immediately for previews, first update, or final update
+            if has_preview or is_first or is_final:
+                self.hook(self.current, self.total, preview, node_id=self.node_id)
+                self._last_update_time = current_time
+                self._last_sent_value = value
+                return
+
+            # Apply throttling for regular progress updates
+            if self.total > 0:
+                percent_changed = ((value - max(0, self._last_sent_value)) / self.total) * 100
+            else:
+                percent_changed = 100
+            time_elapsed = current_time - self._last_update_time
+
+            if time_elapsed >= PROGRESS_THROTTLE_MIN_INTERVAL and percent_changed >= PROGRESS_THROTTLE_MIN_PERCENT:
+                self.hook(self.current, self.total, preview, node_id=self.node_id)
+                self._last_update_time = current_time
+                self._last_sent_value = value

    def update(self, value):
        self.update_absolute(self.current + value)
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@ -185,6 +185,10 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
                io.Combo.Input(
                    "ckpt_name",
                    options=folder_paths.get_filename_list("checkpoints"),
+                ),
+                io.Combo.Input(
+                    "device",
+                    options=["default", "cpu"],
                )
            ],
            outputs=[io.Clip.Output()],
@ -197,7 +201,11 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
        clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
+        model_options = {}
+        if device == "cpu":
+            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
+
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
        return io.NodeOutput(clip)


--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.67
+comfyui-workflow-templates==0.7.69
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.3
+comfy-kitchen>=0.2.5

 #non essential dependencies:
 kornia>=0.7.1
Author	SHA1	Message	Date
Silver	9000dcad64	Merge `fc2f500571` into `fcd9a236b0`	2026-01-08 06:03:29 +03:00
ComfyUI Wiki	fcd9a236b0	Update template to 0.7.69 (#11719 )	2026-01-07 18:22:23 -08:00
comfyanonymous	21e8425087	Add warning for old pytorch. (#11718 )	2026-01-07 21:07:26 -05:00
rattus	b6c79a648a	ops: Fix offloading with FP8MM performance (#11697 ) This logic was checking comfy_cast_weights, and going straight to to the forward_comfy_cast_weights implementation without attempting to downscale input to fp8 in the event comfy_cast_weights is set. The main reason comfy_cast_weights would be set would be for async offload, which is not a good reason to nix FP8MM. So instead, and together the underlying exclusions for FP8MM which are: * having a weight_function (usually LowVramPatch) * force_cast_weights (compute dtype override) * the weight is not Quantized * the input is already quantized * the model or layer has MM explictily disabled. If you get past all of those exclusions, quantize the input tensor. Then hand the new input, quantized or not off to forward_comfy_cast_weights to handle it. If the weight is offloaded but input is quantized you will get an offloaded MM8.	2026-01-07 21:01:16 -05:00
comfyanonymous	25bc1b5b57	Add memory estimation function to ltxav text encoder. (#11716 )	2026-01-07 20:11:22 -05:00
comfyanonymous	3cd19e99c1	Increase ltxav mem estimation by a bit. (#11715 )	2026-01-07 20:04:56 -05:00
comfyanonymous	007b87e7ac	Bump required comfy-kitchen version. (#11714 )	2026-01-07 19:48:47 -05:00
comfyanonymous	34751fe9f9	Lower ltxv text encoder vram use. (#11713 )	2026-01-07 19:12:15 -05:00
Jukka Seppänen	1c705f7bfb	Add device selection for LTXAVTextEncoderLoader (#11700 )	2026-01-07 18:39:59 -05:00
rattus	48e5ea1dfd	model_patcher: Remove confusing load stat (#11710 ) If the loader passes 1e32 as the usable memory size, it means force the full load. This happens with CPU loads and a few other misc cases. Removing the confusing number and just leave the other details.	2026-01-07 18:39:20 -05:00
Silver	fc2f500571	Merge branch 'comfyanonymous:master' into pbar-throttle-priority	2026-01-05 04:21:30 +01:00
Silver	2c98bcad80	Merge branch 'comfyanonymous:master' into pbar-throttle-priority	2026-01-04 10:01:30 +01:00
silveroxides	59cce40fb3	feat: throttle ProgressBar updates to reduce WebSocket flooding	2025-12-25 06:38:26 +01:00