Merge f39c9caa55 into 2e9d51680a

ComfyUI version v0.8.2
Tweak ltxv vae mem estimation. (#11722 )
2026-01-28 23:30:16 +08:00 · 2026-01-08 13:57:45 +09:00 · 2026-01-07 23:50:02 -05:00 · 2026-01-07 23:07:05 -05:00 · 2026-01-07 22:10:08 -05:00 · 2026-01-07 18:22:23 -08:00
13 changed files with 121 additions and 29 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -718,6 +718,7 @@ class ModelPatcher:
                            continue

                cast_weight = self.force_cast_weights
+                m.comfy_force_cast_weights = self.force_cast_weights
                if lowvram_weight:
                    if hasattr(m, "comfy_cast_weights"):
                        m.weight_function = []
@ -790,11 +791,12 @@ class ModelPatcher:
                for param in params:
                    self.pin_weight_to_device("{}.{}".format(n, param))

+            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
-                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                self.model.model_lowvram = True
            else:
-                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
                self.model.model_lowvram = False
                if full_load:
                    self.model.to(device_to)
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -654,29 +654,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                run_every_op()

                input_shape = input.shape
-                tensor_3d = input.ndim == 3
-
-                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
-                    return self.forward_comfy_cast_weights(input, *args, **kwargs)
+                reshaped_3d = False

                if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor)):
+                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
+                    not getattr(self, 'comfy_force_cast_weights', False) and
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0):

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    if tensor_3d:
-                        input = input.reshape(-1, input_shape[2])
+                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

-                    if input.ndim != 2:
-                        # Fall back to comfy_cast_weights for non-2D tensors
-                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
+                    # Fall back to non-quantized for non-2D tensors
+                    if input_reshaped.ndim == 2:
+                        reshaped_3d = input.ndim == 3
+                        # dtype is now implicit in the layout class
+                        scale = getattr(self, 'input_scale', None)
+                        if scale is not None:
+                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

-                    # dtype is now implicit in the layout class
-                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
-
-                output = self._forward(input, self.weight, self.bias)
+                output = self.forward_comfy_cast_weights(input)

                # Reshape output back to 3D if input was 3D
-                if tensor_3d:
+                if reshaped_3d:
                    output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

                return output
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -19,6 +19,7 @@ try:
        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
        if cuda_version < (13,):
            ck.registry.disable("cuda")
+            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -218,7 +218,7 @@ class CLIP:
            if unprojected:
                self.cond_stage_model.set_clip_options({"projected_pooled": False})

-            self.load_model()
+            self.load_model(tokens)
            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
            all_hooks.reset()
            self.patcher.patch_hooks(None)
@ -266,7 +266,7 @@ class CLIP:
        if return_pooled == "unprojected":
            self.cond_stage_model.set_clip_options({"projected_pooled": False})

-        self.load_model()
+        self.load_model(tokens)
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
        o = self.cond_stage_model.encode_token_weights(tokens)
        cond, pooled = o[:2]
@ -299,8 +299,11 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

-    def load_model(self):
-        model_management.load_model_gpu(self.patcher)
+    def load_model(self, tokens={}):
+        memory_used = 0
+        if hasattr(self.cond_stage_model, "memory_estimation_function"):
+            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
        return self.patcher

    def get_key_patches(self):
@ -476,8 +479,8 @@ class VAE:
                self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
                self.latent_channels = 128
                self.latent_dim = 3
-                self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
-                self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
                self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
                self.upscale_index_formula = (8, 32, 32)
                self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -845,7 +845,7 @@ class LTXAV(LTXV):

    def __init__(self, unet_config):
        super().__init__(unet_config)
-        self.memory_usage_factor = 0.055  # TODO
+        self.memory_usage_factor = 0.061  # TODO

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.LTXAV(self, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -98,10 +98,13 @@ class LTXAVTEModel(torch.nn.Module):

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
+        if comfy.model_management.should_use_bf16(self.execution_device):
+            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
        out = out.movedim(1, -1).to(self.execution_device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
+        out = out.float()
        out_vid = self.video_embeddings_connector(out)[0]
        out_audio = self.audio_embeddings_connector(out)[0]
        out = torch.concat((out_vid, out_audio), dim=-1)
@ -118,6 +121,14 @@ class LTXAVTEModel(torch.nn.Module):

            return self.load_state_dict(sdo, strict=False)

+    def memory_estimation_function(self, token_weight_pairs, device=None):
+        constant = 6.0
+        if comfy.model_management.should_use_bf16(device):
+            constant /= 2.0
+
+        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
    class LTXAVTEModel_(LTXAVTEModel):
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@ -1113,6 +1113,18 @@ class DynamicSlot(ComfyTypeI):
            out_dict[input_type][finalized_id] = value
            out_dict["dynamic_paths"][finalized_id] = finalize_prefix(curr_prefix, curr_prefix[-1])

+@comfytype(io_type="IMAGECOMPARE")
+class ImageCompare(ComfyTypeI):
+  Type = dict
+
+  class Input(WidgetInput):
+      def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None,
+                   socketless: bool=True):
+          super().__init__(id, display_name, optional, tooltip, None, None, socketless)
+
+      def as_dict(self):
+          return super().as_dict()
+
 DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {}
 def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]):
    DYNAMIC_INPUT_LOOKUP[io_type] = func
@ -1958,4 +1970,5 @@ __all__ = [
    "add_to_dict_v1",
    "add_to_dict_v3",
    "V3Data",
+    "ImageCompare",
 ]
--- a/comfy_extras/nodes_image_compare.py
+++ b/comfy_extras/nodes_image_compare.py
@ -0,0 +1,53 @@
+import nodes
+
+from typing_extensions import override
+from comfy_api.latest import IO, ComfyExtension
+
+
+class ImageCompare(IO.ComfyNode):
+    """Compares two images with a slider interface."""
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ImageCompare",
+            display_name="Image Compare",
+            description="Compares two images side by side with a slider.",
+            category="image",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                IO.Image.Input("image_a", optional=True),
+                IO.Image.Input("image_b", optional=True),
+                IO.ImageCompare.Input("compare_view"),
+            ],
+            outputs=[],
+        )
+
+    @classmethod
+    def execute(cls, image_a=None, image_b=None, compare_view=None) -> IO.NodeOutput:
+        result = {"a_images": [], "b_images": []}
+
+        preview_node = nodes.PreviewImage()
+
+        if image_a is not None and len(image_a) > 0:
+            saved = preview_node.save_images(image_a, "comfy.compare.a")
+            result["a_images"] = saved["ui"]["images"]
+
+        if image_b is not None and len(image_b) > 0:
+            saved = preview_node.save_images(image_b, "comfy.compare.b")
+            result["b_images"] = saved["ui"]["images"]
+
+        return IO.NodeOutput(ui=result)
+
+
+class ImageCompareExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            ImageCompare,
+        ]
+
+
+async def comfy_entrypoint() -> ImageCompareExtension:
+    return ImageCompareExtension()
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@ -185,6 +185,10 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
                io.Combo.Input(
                    "ckpt_name",
                    options=folder_paths.get_filename_list("checkpoints"),
+                ),
+                io.Combo.Input(
+                    "device",
+                    options=["default", "cpu"],
                )
            ],
            outputs=[io.Clip.Output()],
@ -197,7 +201,11 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
        clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
+        model_options = {}
+        if device == "cpu":
+            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
+
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
        return io.NodeOutput(clip)


--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.8.0"
+__version__ = "0.8.2"
--- a/nodes.py
+++ b/nodes.py
@ -2369,6 +2369,7 @@ async def init_builtin_extra_nodes():
        "nodes_nop.py",
        "nodes_kandinsky5.py",
        "nodes_wanmove.py",
+        "nodes_image_compare.py",
    ]

    import_failed = []
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.8.0"
+version = "0.8.2"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.67
+comfyui-workflow-templates==0.7.69
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.3
+comfy-kitchen>=0.2.5

 #non essential dependencies:
 kornia>=0.7.1
Author	SHA1	Message	Date
Terry Jia	f5c888503a	Merge `f39c9caa55` into `2e9d51680a`	2026-01-08 13:57:45 +09:00
comfyanonymous	2e9d51680a	ComfyUI version v0.8.2 Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-07 23:50:02 -05:00
comfyanonymous	50d6e1caf4	Tweak ltxv vae mem estimation. (#11722 )	2026-01-07 23:07:05 -05:00
comfyanonymous	ac12f77bed	ComfyUI version v0.8.1	2026-01-07 22:10:08 -05:00
ComfyUI Wiki	fcd9a236b0	Update template to 0.7.69 (#11719 )	2026-01-07 18:22:23 -08:00
comfyanonymous	21e8425087	Add warning for old pytorch. (#11718 )	2026-01-07 21:07:26 -05:00
rattus	b6c79a648a	ops: Fix offloading with FP8MM performance (#11697 ) This logic was checking comfy_cast_weights, and going straight to to the forward_comfy_cast_weights implementation without attempting to downscale input to fp8 in the event comfy_cast_weights is set. The main reason comfy_cast_weights would be set would be for async offload, which is not a good reason to nix FP8MM. So instead, and together the underlying exclusions for FP8MM which are: * having a weight_function (usually LowVramPatch) * force_cast_weights (compute dtype override) * the weight is not Quantized * the input is already quantized * the model or layer has MM explictily disabled. If you get past all of those exclusions, quantize the input tensor. Then hand the new input, quantized or not off to forward_comfy_cast_weights to handle it. If the weight is offloaded but input is quantized you will get an offloaded MM8.	2026-01-07 21:01:16 -05:00
comfyanonymous	25bc1b5b57	Add memory estimation function to ltxav text encoder. (#11716 )	2026-01-07 20:11:22 -05:00
comfyanonymous	3cd19e99c1	Increase ltxav mem estimation by a bit. (#11715 )	2026-01-07 20:04:56 -05:00
comfyanonymous	007b87e7ac	Bump required comfy-kitchen version. (#11714 )	2026-01-07 19:48:47 -05:00
comfyanonymous	34751fe9f9	Lower ltxv text encoder vram use. (#11713 )	2026-01-07 19:12:15 -05:00
Jukka Seppänen	1c705f7bfb	Add device selection for LTXAVTextEncoderLoader (#11700 )	2026-01-07 18:39:59 -05:00
rattus	48e5ea1dfd	model_patcher: Remove confusing load stat (#11710 ) If the loader passes 1e32 as the usable memory size, it means force the full load. This happens with CPU loads and a few other misc cases. Removing the confusing number and just leave the other details.	2026-01-07 18:39:20 -05:00
Terry Jia	f39c9caa55	add node - image compare	2025-12-31 20:00:12 -05:00