Merge dac0710c88 into 5943fbf457

bump comfyui_manager version to the 4.0.5 (#11732 )
Revert "Force sequential execution in CI test jobs (#11687 )" (#11725 )
2026-01-16 09:10:50 +08:00 · 2026-01-09 00:34:45 +03:00 · 2026-01-08 08:15:42 -08:00 · 2026-01-07 21:41:57 -08:00 · 2026-01-07 23:50:02 -05:00 · 2026-01-07 23:07:05 -05:00
21 changed files with 284 additions and 58 deletions
--- a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
+++ b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
@ -1,3 +1,3 @@
 ..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
 pause
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -117,7 +117,7 @@ jobs:
          ./python.exe get-pip.py
          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*

-          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
          ./python.exe -s -m pip install -r requirements_comfyui.txt
          rm requirements_comfyui.txt

--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -408,7 +408,9 @@ class LTXV(LatentFormat):
        self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]

 class LTXAV(LTXV):
-    pass
+    def __init__(self):
+        self.latent_rgb_factors = None
+        self.latent_rgb_factors_bias = None

 class HunyuanVideo(LatentFormat):
    latent_channels = 16
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -4,6 +4,7 @@ from torch import Tensor

 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
+import logging


 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
@ -13,7 +14,6 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transforme
    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
    return x

-
 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
@ -28,13 +28,20 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
    return out.to(dtype=torch.float32, device=pos.device)

-def apply_rope1(x: Tensor, freqs_cis: Tensor):
-    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

-    x_out = freqs_cis[..., 0] * x_[..., 0]
-    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+try:
+    import comfy.quant_ops
+    apply_rope = comfy.quant_ops.ck.apply_rope
+    apply_rope1 = comfy.quant_ops.ck.apply_rope1
+except:
+    logging.warning("No comfy kitchen, using old apply_rope functions.")
+    def apply_rope1(x: Tensor, freqs_cis: Tensor):
+        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

-    return x_out.reshape(*x.shape).type_as(x)
+        x_out = freqs_cis[..., 0] * x_[..., 0]
+        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])

-def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+        return x_out.reshape(*x.shape).type_as(x)
+
+    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@ -276,7 +276,7 @@ class Embeddings1DConnector(nn.Module):
                max(1024, hidden_states.shape[1]) / self.num_learnable_registers
            )
            learnable_registers = torch.tile(
-                self.learnable_registers, (num_registers_duplications, 1)
+                self.learnable_registers.to(hidden_states), (num_registers_duplications, 1)
            )

            hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1504,6 +1504,16 @@ def supports_fp8_compute(device=None):

    return True

+def supports_nvfp4_compute(device=None):
+    if not is_nvidia():
+        return False
+
+    props = torch.cuda.get_device_properties(device)
+    if props.major < 10:
+        return False
+
+    return True
+
 def extended_fp16_support():
    # TODO: check why some models work with fp16 on newer torch versions but not on older
    if torch_version_numeric < (2, 7):
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -718,6 +718,7 @@ class ModelPatcher:
                            continue

                cast_weight = self.force_cast_weights
+                m.comfy_force_cast_weights = self.force_cast_weights
                if lowvram_weight:
                    if hasattr(m, "comfy_cast_weights"):
                        m.weight_function = []
@ -790,11 +791,12 @@ class ModelPatcher:
                for param in params:
                    self.pin_weight_to_device("{}.{}".format(n, param))

+            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
-                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                self.model.model_lowvram = True
            else:
-                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
                self.model.model_lowvram = False
                if full_load:
                    self.model.to(device_to)
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -427,12 +427,12 @@ def fp8_linear(self, input):
    input = torch.clamp(input, min=-448, max=448, out=input)
    input_fp8 = input.to(dtype).contiguous()
    layout_params_input = TensorCoreFP8Layout.Params(scale=scale_input, orig_dtype=input_dtype, orig_shape=tuple(input_fp8.shape))
-    quantized_input = QuantizedTensor(input_fp8, TensorCoreFP8Layout, layout_params_input)
+    quantized_input = QuantizedTensor(input_fp8, "TensorCoreFP8Layout", layout_params_input)

    # Wrap weight in QuantizedTensor - this enables unified dispatch
    # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
    layout_params_weight = TensorCoreFP8Layout.Params(scale=scale_weight, orig_dtype=input_dtype, orig_shape=tuple(w.shape))
-    quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
+    quantized_weight = QuantizedTensor(w, "TensorCoreFP8Layout", layout_params_weight)
    o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)

    uncast_bias_weight(self, w, bias, offload_stream)
@ -493,11 +493,12 @@ from .quant_ops import (
 )


-def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False):
+def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False, disabled=[]):
    class MixedPrecisionOps(manual_cast):
        _quant_config = quant_config
        _compute_dtype = compute_dtype
        _full_precision_mm = full_precision_mm
+        _disabled = disabled

        class Linear(torch.nn.Module, CastWeightBiasOp):
            def __init__(
@ -522,6 +523,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

                self.tensor_class = None
                self._full_precision_mm = MixedPrecisionOps._full_precision_mm
+                self._full_precision_mm_config = False

            def reset_parameters(self):
                return None
@ -556,8 +558,12 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                    self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
                else:
                    self.quant_format = layer_conf.get("format", None)
+                    self._full_precision_mm_config = layer_conf.get("full_precision_matrix_mult", False)
                    if not self._full_precision_mm:
-                        self._full_precision_mm = layer_conf.get("full_precision_matrix_mult", False)
+                        self._full_precision_mm = self._full_precision_mm_config
+
+                    if self.quant_format in MixedPrecisionOps._disabled:
+                        self._full_precision_mm = True

                    if self.quant_format is None:
                        raise ValueError(f"Unknown quantization format for layer {layer_name}")
@ -630,7 +636,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                        sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale

                    quant_conf = {"format": self.quant_format}
-                    if self._full_precision_mm:
+                    if self._full_precision_mm_config:
                        quant_conf["full_precision_matrix_mult"] = True
                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
                return sd
@ -648,29 +654,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                run_every_op()

                input_shape = input.shape
-                tensor_3d = input.ndim == 3
-
-                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
-                    return self.forward_comfy_cast_weights(input, *args, **kwargs)
+                reshaped_3d = False

                if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor)):
+                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
+                    not getattr(self, 'comfy_force_cast_weights', False) and
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0):

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    if tensor_3d:
-                        input = input.reshape(-1, input_shape[2])
+                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

-                    if input.ndim != 2:
-                        # Fall back to comfy_cast_weights for non-2D tensors
-                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
+                    # Fall back to non-quantized for non-2D tensors
+                    if input_reshaped.ndim == 2:
+                        reshaped_3d = input.ndim == 3
+                        # dtype is now implicit in the layout class
+                        scale = getattr(self, 'input_scale', None)
+                        if scale is not None:
+                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

-                    # dtype is now implicit in the layout class
-                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
-
-                output = self._forward(input, self.weight, self.bias)
+                output = self.forward_comfy_cast_weights(input)

                # Reshape output back to 3D if input was 3D
-                if tensor_3d:
+                if reshaped_3d:
                    output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

                return output
@ -711,10 +717,17 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
    fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
+    nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)

    if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
        logging.info("Using mixed precision operations")
-        return mixed_precision_ops(model_config.quant_config, compute_dtype, full_precision_mm=not fp8_compute)
+        disabled = set()
+        if not nvfp4_compute:
+            disabled.add("nvfp4")
+        if not fp8_compute:
+            disabled.add("float8_e4m3fn")
+            disabled.add("float8_e5m2")
+        return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled)

    if (
        fp8_compute and
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -13,6 +13,14 @@ try:
        get_layout_class,
    )
    _CK_AVAILABLE = True
+    if torch.version.cuda is None:
+        ck.registry.disable("cuda")
+    else:
+        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
+        if cuda_version < (13,):
+            ck.registry.disable("cuda")
+            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
+
    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -218,7 +218,7 @@ class CLIP:
            if unprojected:
                self.cond_stage_model.set_clip_options({"projected_pooled": False})

-            self.load_model()
+            self.load_model(tokens)
            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
            all_hooks.reset()
            self.patcher.patch_hooks(None)
@ -266,7 +266,7 @@ class CLIP:
        if return_pooled == "unprojected":
            self.cond_stage_model.set_clip_options({"projected_pooled": False})

-        self.load_model()
+        self.load_model(tokens)
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
        o = self.cond_stage_model.encode_token_weights(tokens)
        cond, pooled = o[:2]
@ -299,8 +299,11 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

-    def load_model(self):
-        model_management.load_model_gpu(self.patcher)
+    def load_model(self, tokens={}):
+        memory_used = 0
+        if hasattr(self.cond_stage_model, "memory_estimation_function"):
+            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
+        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
        return self.patcher

    def get_key_patches(self):
@ -476,8 +479,8 @@ class VAE:
                self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
                self.latent_channels = 128
                self.latent_dim = 3
-                self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
-                self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
                self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
                self.upscale_index_formula = (8, 32, 32)
                self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -845,7 +845,7 @@ class LTXAV(LTXV):

    def __init__(self, unet_config):
        super().__init__(unet_config)
-        self.memory_usage_factor = 0.055  # TODO
+        self.memory_usage_factor = 0.061  # TODO

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.LTXAV(self, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):

 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None)
-        if llama_scaled_fp8 is not None:
+        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
+        if llama_quantization_metadata is not None:
            model_options = model_options.copy()
-            model_options["scaled_fp8"] = llama_scaled_fp8
+            model_options["quantization_metadata"] = llama_quantization_metadata

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

@ -86,20 +86,25 @@ class LTXAVTEModel(torch.nn.Module):
        )

    def set_clip_options(self, options):
+        self.execution_device = options.get("execution_device", self.execution_device)
        self.gemma3_12b.set_clip_options(options)

    def reset_clip_options(self):
        self.gemma3_12b.reset_clip_options()
+        self.execution_device = None

    def encode_token_weights(self, token_weight_pairs):
        token_weight_pairs = token_weight_pairs["gemma3_12b"]

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
-        out = out.movedim(1, -1).to(self.text_embedding_projection.weight.device)
+        if comfy.model_management.should_use_bf16(self.execution_device):
+            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
+        out = out.movedim(1, -1).to(self.execution_device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
+        out = out.float()
        out_vid = self.video_embeddings_connector(out)[0]
        out_audio = self.audio_embeddings_connector(out)[0]
        out = torch.concat((out_vid, out_audio), dim=-1)
@ -116,13 +121,21 @@ class LTXAVTEModel(torch.nn.Module):

            return self.load_state_dict(sdo, strict=False)

+    def memory_estimation_function(self, token_weight_pairs, device=None):
+        constant = 6.0
+        if comfy.model_management.should_use_bf16(device):
+            constant /= 2.0

-def ltxav_te(dtype_llama=None, llama_scaled_fp8=None):
+        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        return num_tokens * constant * 1024 * 1024
+
+def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
    class LTXAVTEModel_(LTXAVTEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
+            if llama_quantization_metadata is not None:
                model_options = model_options.copy()
-                model_options["llama_scaled_fp8"] = llama_scaled_fp8
+                model_options["llama_quantization_metadata"] = llama_quantization_metadata
            if dtype_llama is not None:
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@ -13,7 +13,9 @@ from comfy_api_nodes.util import (
    poll_op,
    sync_op,
    tensor_to_base64_string,
+    upload_video_to_comfyapi,
    validate_audio_duration,
+    validate_video_duration,
 )


@ -41,6 +43,12 @@ class Image2VideoInputField(BaseModel):
    audio_url: str | None = Field(None)


+class Reference2VideoInputField(BaseModel):
+    prompt: str = Field(...)
+    negative_prompt: str | None = Field(None)
+    reference_video_urls: list[str] = Field(...)
+
+
 class Txt2ImageParametersField(BaseModel):
    size: str = Field(...)
    n: int = Field(1, description="Number of images to generate.")  # we support only value=1
@ -76,6 +84,14 @@ class Image2VideoParametersField(BaseModel):
    shot_type: str = Field("single")


+class Reference2VideoParametersField(BaseModel):
+    size: str = Field(...)
+    duration: int = Field(5, ge=5, le=15)
+    shot_type: str = Field("single")
+    seed: int = Field(..., ge=0, le=2147483647)
+    watermark: bool = Field(False)
+
+
 class Text2ImageTaskCreationRequest(BaseModel):
    model: str = Field(...)
    input: Text2ImageInputField = Field(...)
@ -100,6 +116,12 @@ class Image2VideoTaskCreationRequest(BaseModel):
    parameters: Image2VideoParametersField = Field(...)


+class Reference2VideoTaskCreationRequest(BaseModel):
+    model: str = Field(...)
+    input: Reference2VideoInputField = Field(...)
+    parameters: Reference2VideoParametersField = Field(...)
+
+
 class TaskCreationOutputField(BaseModel):
    task_id: str = Field(...)
    task_status: str = Field(...)
@ -721,6 +743,143 @@ class WanImageToVideoApi(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))


+class WanReferenceVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="WanReferenceVideoApi",
+            display_name="Wan Reference to Video",
+            category="api node/video/Wan",
+            description="Use the character and voice from input videos, combined with a prompt, "
+            "to generate a new video that maintains character consistency.",
+            inputs=[
+                IO.Combo.Input("model", options=["wan2.6-r2v"]),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese. "
+                    "Use identifiers such as `character1` and `character2` to refer to the reference characters.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Negative prompt describing what to avoid.",
+                ),
+                IO.Autogrow.Input(
+                    "reference_videos",
+                    template=IO.Autogrow.TemplateNames(
+                        IO.Video.Input("reference_video"),
+                        names=["character1", "character2", "character3"],
+                        min=1,
+                    ),
+                ),
+                IO.Combo.Input(
+                    "size",
+                    options=[
+                        "720p: 1:1 (960x960)",
+                        "720p: 16:9 (1280x720)",
+                        "720p: 9:16 (720x1280)",
+                        "720p: 4:3 (1088x832)",
+                        "720p: 3:4 (832x1088)",
+                        "1080p: 1:1 (1440x1440)",
+                        "1080p: 16:9 (1920x1080)",
+                        "1080p: 9:16 (1080x1920)",
+                        "1080p: 4:3 (1632x1248)",
+                        "1080p: 3:4 (1248x1632)",
+                    ],
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=5,
+                    max=10,
+                    step=5,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input(
+                    "shot_type",
+                    options=["single", "multi"],
+                    tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
+                    "single continuous shot or multiple shots with cuts.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        prompt: str,
+        negative_prompt: str,
+        reference_videos: IO.Autogrow.Type,
+        size: str,
+        duration: int,
+        seed: int,
+        shot_type: str,
+        watermark: bool,
+    ):
+        reference_video_urls = []
+        for i in reference_videos:
+            validate_video_duration(reference_videos[i], min_duration=2, max_duration=30)
+        for i in reference_videos:
+            reference_video_urls.append(await upload_video_to_comfyapi(cls, reference_videos[i]))
+        width, height = RES_IN_PARENS.search(size).groups()
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"),
+            response_model=TaskCreationResponse,
+            data=Reference2VideoTaskCreationRequest(
+                model=model,
+                input=Reference2VideoInputField(
+                    prompt=prompt, negative_prompt=negative_prompt, reference_video_urls=reference_video_urls
+                ),
+                parameters=Reference2VideoParametersField(
+                    size=f"{width}*{height}",
+                    duration=duration,
+                    shot_type=shot_type,
+                    watermark=watermark,
+                    seed=seed,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=6,
+            max_poll_attempts=280,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
 class WanApiExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -729,6 +888,7 @@ class WanApiExtension(ComfyExtension):
            WanImageToImageApi,
            WanTextToVideoApi,
            WanImageToVideoApi,
+            WanReferenceVideoApi,
        ]


--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@ -119,7 +119,7 @@ async def upload_video_to_comfyapi(
            raise ValueError(f"Could not verify video duration from source: {e}") from e

    upload_mime_type = f"video/{container.value.lower()}"
-    filename = f"uploaded_video.{container.value.lower()}"
+    filename = f"{uuid.uuid4()}.{container.value.lower()}"

    # Convert VideoInput to BytesIO using specified container/codec
    video_bytes_io = BytesIO()
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@ -185,6 +185,10 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
                io.Combo.Input(
                    "ckpt_name",
                    options=folder_paths.get_filename_list("checkpoints"),
+                ),
+                io.Combo.Input(
+                    "device",
+                    options=["default", "cpu"],
                )
            ],
            outputs=[io.Clip.Output()],
@ -197,7 +201,11 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
        clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
+        model_options = {}
+        if device == "cpu":
+            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
+
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
        return io.NodeOutput(clip)


--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.7.0"
+__version__ = "0.8.2"
--- a/manager_requirements.txt
+++ b/manager_requirements.txt
@ -1 +1 @@
-comfyui_manager==4.0.4
+comfyui_manager==4.0.5
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.7.0"
+version = "0.8.2"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.66
+comfyui-workflow-templates==0.7.69
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.0
+comfy-kitchen>=0.2.5

 #non essential dependencies:
 kornia>=0.7.1
Author	SHA1	Message	Date
Bernhard Frauendienst	02ca4b610a	Merge `dac0710c88` into `5943fbf457`	2026-01-09 00:34:45 +03:00
Dr.Lt.Data	5943fbf457	bump comfyui_manager version to the 4.0.5 (#11732 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-08 08:15:42 -08:00
Yoland Yan	a60b7b86c5	Revert "Force sequential execution in CI test jobs (#11687 )" (#11725 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details This reverts commit `ce0000c4f2`.	2026-01-07 21:41:57 -08:00
comfyanonymous	2e9d51680a	ComfyUI version v0.8.2 Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-07 23:50:02 -05:00
comfyanonymous	50d6e1caf4	Tweak ltxv vae mem estimation. (#11722 )	2026-01-07 23:07:05 -05:00
comfyanonymous	ac12f77bed	ComfyUI version v0.8.1	2026-01-07 22:10:08 -05:00
ComfyUI Wiki	fcd9a236b0	Update template to 0.7.69 (#11719 )	2026-01-07 18:22:23 -08:00
comfyanonymous	21e8425087	Add warning for old pytorch. (#11718 )	2026-01-07 21:07:26 -05:00
rattus	b6c79a648a	ops: Fix offloading with FP8MM performance (#11697 ) This logic was checking comfy_cast_weights, and going straight to to the forward_comfy_cast_weights implementation without attempting to downscale input to fp8 in the event comfy_cast_weights is set. The main reason comfy_cast_weights would be set would be for async offload, which is not a good reason to nix FP8MM. So instead, and together the underlying exclusions for FP8MM which are: * having a weight_function (usually LowVramPatch) * force_cast_weights (compute dtype override) * the weight is not Quantized * the input is already quantized * the model or layer has MM explictily disabled. If you get past all of those exclusions, quantize the input tensor. Then hand the new input, quantized or not off to forward_comfy_cast_weights to handle it. If the weight is offloaded but input is quantized you will get an offloaded MM8.	2026-01-07 21:01:16 -05:00
comfyanonymous	25bc1b5b57	Add memory estimation function to ltxav text encoder. (#11716 )	2026-01-07 20:11:22 -05:00
comfyanonymous	3cd19e99c1	Increase ltxav mem estimation by a bit. (#11715 )	2026-01-07 20:04:56 -05:00
comfyanonymous	007b87e7ac	Bump required comfy-kitchen version. (#11714 )	2026-01-07 19:48:47 -05:00
comfyanonymous	34751fe9f9	Lower ltxv text encoder vram use. (#11713 )	2026-01-07 19:12:15 -05:00
Jukka Seppänen	1c705f7bfb	Add device selection for LTXAVTextEncoderLoader (#11700 )	2026-01-07 18:39:59 -05:00
rattus	48e5ea1dfd	model_patcher: Remove confusing load stat (#11710 ) If the loader passes 1e32 as the usable memory size, it means force the full load. This happens with CPU loads and a few other misc cases. Removing the confusing number and just leave the other details.	2026-01-07 18:39:20 -05:00
comfyanonymous	3cd7b32f1b	Support gemma 12B with quant weights. (#11696 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-07 05:15:14 -05:00
comfyanonymous	c0c9720d77	Fix stable release workflow not pulling latest comfy kitchen. (#11695 )	2026-01-07 04:48:28 -05:00
comfyanonymous	fc0cb10bcb	ComfyUI v0.8.0	2026-01-07 04:07:31 -05:00
comfyanonymous	b7d7cc1d49	Fix fp8 fast issue. (#11688 )	2026-01-07 01:39:06 -05:00
Alexander Piskun	79e94544bd	feat(api-nodes): add WAN2.6 ReferenceToVideo (#11644 )	2026-01-06 22:04:50 -08:00
Yoland Yan	ce0000c4f2	Force sequential execution in CI test jobs (#11687 ) Added max-parallel setting to enforce sequential execution in test jobs.	2026-01-07 00:57:31 -05:00
comfyanonymous	c5cfb34c07	Update comfy-kitchen version to 0.2.3 (#11685 )	2026-01-06 23:51:45 -05:00
comfyanonymous	edee33f55e	Disable comfy kitchen cuda if pytorch cuda less than 13 (#11681 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-06 22:13:43 -05:00
comfyanonymous	2c03884f5f	Skip fp4 matrix mult on devices that don't support it. (#11677 )	2026-01-06 18:07:26 -05:00
comfyanonymous	6e9ee55cdd	Disable ltxav previews. (#11676 )	2026-01-06 17:41:27 -05:00
comfyanonymous	023cf13721	Fix lowvram issue with ltxv2 text encoder. (#11675 )	2026-01-06 17:33:03 -05:00
ComfyUI Wiki	c3566c0d76	chore: update workflow templates to v0.7.67 (#11667 )	2026-01-06 14:28:29 -08:00
comfyanonymous	c3c3e93c5b	Use rope functions from comfy kitchen. (#11674 )	2026-01-06 16:57:50 -05:00
comfyanonymous	6ffc159bdd	Update comfy-kitchen version to 0.2.1 (#11672 )	2026-01-06 15:53:43 -05:00
comfyanonymous	96e0d0924e	Add helpful message to portable. (#11671 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-06 14:43:24 -05:00