Merge dac0710c88 into e14f3b6610

2026-01-17 01:30:50 +08:00 · 2026-01-06 13:12:55 -06:00
21 changed files with 58 additions and 284 deletions
--- a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
+++ b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
@ -1,3 +1,3 @@
 ..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -117,7 +117,7 @@ jobs:
          ./python.exe get-pip.py
          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*

-          grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
          ./python.exe -s -m pip install -r requirements_comfyui.txt
          rm requirements_comfyui.txt

--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -408,9 +408,7 @@ class LTXV(LatentFormat):
        self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]

 class LTXAV(LTXV):
-    def __init__(self):
-        self.latent_rgb_factors = None
-        self.latent_rgb_factors_bias = None
+    pass

 class HunyuanVideo(LatentFormat):
    latent_channels = 16
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -4,7 +4,6 @@ from torch import Tensor

 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
-import logging


 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
@ -14,6 +13,7 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transforme
    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
    return x

+
 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
@ -28,20 +28,13 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
    return out.to(dtype=torch.float32, device=pos.device)

+def apply_rope1(x: Tensor, freqs_cis: Tensor):
+    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

-try:
-    import comfy.quant_ops
-    apply_rope = comfy.quant_ops.ck.apply_rope
-    apply_rope1 = comfy.quant_ops.ck.apply_rope1
-except:
-    logging.warning("No comfy kitchen, using old apply_rope functions.")
-    def apply_rope1(x: Tensor, freqs_cis: Tensor):
-        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
+    x_out = freqs_cis[..., 0] * x_[..., 0]
+    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])

-        x_out = freqs_cis[..., 0] * x_[..., 0]
-        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+    return x_out.reshape(*x.shape).type_as(x)

-        return x_out.reshape(*x.shape).type_as(x)
-
-    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@ -276,7 +276,7 @@ class Embeddings1DConnector(nn.Module):
                max(1024, hidden_states.shape[1]) / self.num_learnable_registers
            )
            learnable_registers = torch.tile(
-                self.learnable_registers.to(hidden_states), (num_registers_duplications, 1)
+                self.learnable_registers, (num_registers_duplications, 1)
            )

            hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1504,16 +1504,6 @@ def supports_fp8_compute(device=None):

    return True

-def supports_nvfp4_compute(device=None):
-    if not is_nvidia():
-        return False
-
-    props = torch.cuda.get_device_properties(device)
-    if props.major < 10:
-        return False
-
-    return True
-
 def extended_fp16_support():
    # TODO: check why some models work with fp16 on newer torch versions but not on older
    if torch_version_numeric < (2, 7):
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -718,7 +718,6 @@ class ModelPatcher:
                            continue

                cast_weight = self.force_cast_weights
-                m.comfy_force_cast_weights = self.force_cast_weights
                if lowvram_weight:
                    if hasattr(m, "comfy_cast_weights"):
                        m.weight_function = []
@ -791,12 +790,11 @@ class ModelPatcher:
                for param in params:
                    self.pin_weight_to_device("{}.{}".format(n, param))

-            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
-                logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
                self.model.model_lowvram = True
            else:
-                logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
                self.model.model_lowvram = False
                if full_load:
                    self.model.to(device_to)
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -427,12 +427,12 @@ def fp8_linear(self, input):
    input = torch.clamp(input, min=-448, max=448, out=input)
    input_fp8 = input.to(dtype).contiguous()
    layout_params_input = TensorCoreFP8Layout.Params(scale=scale_input, orig_dtype=input_dtype, orig_shape=tuple(input_fp8.shape))
-    quantized_input = QuantizedTensor(input_fp8, "TensorCoreFP8Layout", layout_params_input)
+    quantized_input = QuantizedTensor(input_fp8, TensorCoreFP8Layout, layout_params_input)

    # Wrap weight in QuantizedTensor - this enables unified dispatch
    # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
    layout_params_weight = TensorCoreFP8Layout.Params(scale=scale_weight, orig_dtype=input_dtype, orig_shape=tuple(w.shape))
-    quantized_weight = QuantizedTensor(w, "TensorCoreFP8Layout", layout_params_weight)
+    quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
    o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)

    uncast_bias_weight(self, w, bias, offload_stream)
@ -493,12 +493,11 @@ from .quant_ops import (
 )


-def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False, disabled=[]):
+def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False):
    class MixedPrecisionOps(manual_cast):
        _quant_config = quant_config
        _compute_dtype = compute_dtype
        _full_precision_mm = full_precision_mm
-        _disabled = disabled

        class Linear(torch.nn.Module, CastWeightBiasOp):
            def __init__(
@ -523,7 +522,6 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

                self.tensor_class = None
                self._full_precision_mm = MixedPrecisionOps._full_precision_mm
-                self._full_precision_mm_config = False

            def reset_parameters(self):
                return None
@ -558,12 +556,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                    self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
                else:
                    self.quant_format = layer_conf.get("format", None)
-                    self._full_precision_mm_config = layer_conf.get("full_precision_matrix_mult", False)
                    if not self._full_precision_mm:
-                        self._full_precision_mm = self._full_precision_mm_config
-
-                    if self.quant_format in MixedPrecisionOps._disabled:
-                        self._full_precision_mm = True
+                        self._full_precision_mm = layer_conf.get("full_precision_matrix_mult", False)

                    if self.quant_format is None:
                        raise ValueError(f"Unknown quantization format for layer {layer_name}")
@ -636,7 +630,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                        sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale

                    quant_conf = {"format": self.quant_format}
-                    if self._full_precision_mm_config:
+                    if self._full_precision_mm:
                        quant_conf["full_precision_matrix_mult"] = True
                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
                return sd
@ -654,29 +648,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                run_every_op()

                input_shape = input.shape
-                reshaped_3d = False
+                tensor_3d = input.ndim == 3
+
+                if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
+                    return self.forward_comfy_cast_weights(input, *args, **kwargs)

                if (getattr(self, 'layout_type', None) is not None and
-                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
-                    not getattr(self, 'comfy_force_cast_weights', False) and
-                    len(self.weight_function) == 0 and len(self.bias_function) == 0):
+                    not isinstance(input, QuantizedTensor)):

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input
+                    if tensor_3d:
+                        input = input.reshape(-1, input_shape[2])

-                    # Fall back to non-quantized for non-2D tensors
-                    if input_reshaped.ndim == 2:
-                        reshaped_3d = input.ndim == 3
-                        # dtype is now implicit in the layout class
-                        scale = getattr(self, 'input_scale', None)
-                        if scale is not None:
-                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
-                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)
+                    if input.ndim != 2:
+                        # Fall back to comfy_cast_weights for non-2D tensors
+                        return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)

-                output = self.forward_comfy_cast_weights(input)
+                    # dtype is now implicit in the layout class
+                    input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
+
+                output = self._forward(input, self.weight, self.bias)

                # Reshape output back to 3D if input was 3D
-                if reshaped_3d:
+                if tensor_3d:
                    output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

                return output
@ -717,17 +711,10 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
    fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
-    nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)

    if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
        logging.info("Using mixed precision operations")
-        disabled = set()
-        if not nvfp4_compute:
-            disabled.add("nvfp4")
-        if not fp8_compute:
-            disabled.add("float8_e4m3fn")
-            disabled.add("float8_e5m2")
-        return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled)
+        return mixed_precision_ops(model_config.quant_config, compute_dtype, full_precision_mm=not fp8_compute)

    if (
        fp8_compute and
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -13,14 +13,6 @@ try:
        get_layout_class,
    )
    _CK_AVAILABLE = True
-    if torch.version.cuda is None:
-        ck.registry.disable("cuda")
-    else:
-        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
-        if cuda_version < (13,):
-            ck.registry.disable("cuda")
-            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
-
    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -218,7 +218,7 @@ class CLIP:
            if unprojected:
                self.cond_stage_model.set_clip_options({"projected_pooled": False})

-            self.load_model(tokens)
+            self.load_model()
            self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
            all_hooks.reset()
            self.patcher.patch_hooks(None)
@ -266,7 +266,7 @@ class CLIP:
        if return_pooled == "unprojected":
            self.cond_stage_model.set_clip_options({"projected_pooled": False})

-        self.load_model(tokens)
+        self.load_model()
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
        o = self.cond_stage_model.encode_token_weights(tokens)
        cond, pooled = o[:2]
@ -299,11 +299,8 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

-    def load_model(self, tokens={}):
-        memory_used = 0
-        if hasattr(self.cond_stage_model, "memory_estimation_function"):
-            memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
-        model_management.load_models_gpu([self.patcher], memory_required=memory_used)
+    def load_model(self):
+        model_management.load_model_gpu(self.patcher)
        return self.patcher

    def get_key_patches(self):
@ -479,8 +476,8 @@ class VAE:
                self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
                self.latent_channels = 128
                self.latent_dim = 3
-                self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
-                self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
                self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
                self.upscale_index_formula = (8, 32, 32)
                self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -845,7 +845,7 @@ class LTXAV(LTXV):

    def __init__(self, unet_config):
        super().__init__(unet_config)
-        self.memory_usage_factor = 0.061  # TODO
+        self.memory_usage_factor = 0.055  # TODO

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.LTXAV(self, device=device)
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):

 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
-        if llama_quantization_metadata is not None:
+        llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None)
+        if llama_scaled_fp8 is not None:
            model_options = model_options.copy()
-            model_options["quantization_metadata"] = llama_quantization_metadata
+            model_options["scaled_fp8"] = llama_scaled_fp8

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

@ -86,25 +86,20 @@ class LTXAVTEModel(torch.nn.Module):
        )

    def set_clip_options(self, options):
-        self.execution_device = options.get("execution_device", self.execution_device)
        self.gemma3_12b.set_clip_options(options)

    def reset_clip_options(self):
        self.gemma3_12b.reset_clip_options()
-        self.execution_device = None

    def encode_token_weights(self, token_weight_pairs):
        token_weight_pairs = token_weight_pairs["gemma3_12b"]

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
-        if comfy.model_management.should_use_bf16(self.execution_device):
-            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
-        out = out.movedim(1, -1).to(self.execution_device)
+        out = out.movedim(1, -1).to(self.text_embedding_projection.weight.device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
-        out = out.float()
        out_vid = self.video_embeddings_connector(out)[0]
        out_audio = self.audio_embeddings_connector(out)[0]
        out = torch.concat((out_vid, out_audio), dim=-1)
@ -121,21 +116,13 @@ class LTXAVTEModel(torch.nn.Module):

            return self.load_state_dict(sdo, strict=False)

-    def memory_estimation_function(self, token_weight_pairs, device=None):
-        constant = 6.0
-        if comfy.model_management.should_use_bf16(device):
-            constant /= 2.0

-        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
-        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
-        return num_tokens * constant * 1024 * 1024
-
-def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
+def ltxav_te(dtype_llama=None, llama_scaled_fp8=None):
    class LTXAVTEModel_(LTXAVTEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_quantization_metadata is not None:
+            if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
                model_options = model_options.copy()
-                model_options["llama_quantization_metadata"] = llama_quantization_metadata
+                model_options["llama_scaled_fp8"] = llama_scaled_fp8
            if dtype_llama is not None:
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@ -13,9 +13,7 @@ from comfy_api_nodes.util import (
    poll_op,
    sync_op,
    tensor_to_base64_string,
-    upload_video_to_comfyapi,
    validate_audio_duration,
-    validate_video_duration,
 )


@ -43,12 +41,6 @@ class Image2VideoInputField(BaseModel):
    audio_url: str | None = Field(None)


-class Reference2VideoInputField(BaseModel):
-    prompt: str = Field(...)
-    negative_prompt: str | None = Field(None)
-    reference_video_urls: list[str] = Field(...)
-
-
 class Txt2ImageParametersField(BaseModel):
    size: str = Field(...)
    n: int = Field(1, description="Number of images to generate.")  # we support only value=1
@ -84,14 +76,6 @@ class Image2VideoParametersField(BaseModel):
    shot_type: str = Field("single")


-class Reference2VideoParametersField(BaseModel):
-    size: str = Field(...)
-    duration: int = Field(5, ge=5, le=15)
-    shot_type: str = Field("single")
-    seed: int = Field(..., ge=0, le=2147483647)
-    watermark: bool = Field(False)
-
-
 class Text2ImageTaskCreationRequest(BaseModel):
    model: str = Field(...)
    input: Text2ImageInputField = Field(...)
@ -116,12 +100,6 @@ class Image2VideoTaskCreationRequest(BaseModel):
    parameters: Image2VideoParametersField = Field(...)


-class Reference2VideoTaskCreationRequest(BaseModel):
-    model: str = Field(...)
-    input: Reference2VideoInputField = Field(...)
-    parameters: Reference2VideoParametersField = Field(...)
-
-
 class TaskCreationOutputField(BaseModel):
    task_id: str = Field(...)
    task_status: str = Field(...)
@ -743,143 +721,6 @@ class WanImageToVideoApi(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))


-class WanReferenceVideoApi(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="WanReferenceVideoApi",
-            display_name="Wan Reference to Video",
-            category="api node/video/Wan",
-            description="Use the character and voice from input videos, combined with a prompt, "
-            "to generate a new video that maintains character consistency.",
-            inputs=[
-                IO.Combo.Input("model", options=["wan2.6-r2v"]),
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    default="",
-                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese. "
-                    "Use identifiers such as `character1` and `character2` to refer to the reference characters.",
-                ),
-                IO.String.Input(
-                    "negative_prompt",
-                    multiline=True,
-                    default="",
-                    tooltip="Negative prompt describing what to avoid.",
-                ),
-                IO.Autogrow.Input(
-                    "reference_videos",
-                    template=IO.Autogrow.TemplateNames(
-                        IO.Video.Input("reference_video"),
-                        names=["character1", "character2", "character3"],
-                        min=1,
-                    ),
-                ),
-                IO.Combo.Input(
-                    "size",
-                    options=[
-                        "720p: 1:1 (960x960)",
-                        "720p: 16:9 (1280x720)",
-                        "720p: 9:16 (720x1280)",
-                        "720p: 4:3 (1088x832)",
-                        "720p: 3:4 (832x1088)",
-                        "1080p: 1:1 (1440x1440)",
-                        "1080p: 16:9 (1920x1080)",
-                        "1080p: 9:16 (1080x1920)",
-                        "1080p: 4:3 (1632x1248)",
-                        "1080p: 3:4 (1248x1632)",
-                    ],
-                ),
-                IO.Int.Input(
-                    "duration",
-                    default=5,
-                    min=5,
-                    max=10,
-                    step=5,
-                    display_mode=IO.NumberDisplay.slider,
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    step=1,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                ),
-                IO.Combo.Input(
-                    "shot_type",
-                    options=["single", "multi"],
-                    tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
-                    "single continuous shot or multiple shots with cuts.",
-                ),
-                IO.Boolean.Input(
-                    "watermark",
-                    default=False,
-                    tooltip="Whether to add an AI-generated watermark to the result.",
-                ),
-            ],
-            outputs=[
-                IO.Video.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        model: str,
-        prompt: str,
-        negative_prompt: str,
-        reference_videos: IO.Autogrow.Type,
-        size: str,
-        duration: int,
-        seed: int,
-        shot_type: str,
-        watermark: bool,
-    ):
-        reference_video_urls = []
-        for i in reference_videos:
-            validate_video_duration(reference_videos[i], min_duration=2, max_duration=30)
-        for i in reference_videos:
-            reference_video_urls.append(await upload_video_to_comfyapi(cls, reference_videos[i]))
-        width, height = RES_IN_PARENS.search(size).groups()
-        initial_response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"),
-            response_model=TaskCreationResponse,
-            data=Reference2VideoTaskCreationRequest(
-                model=model,
-                input=Reference2VideoInputField(
-                    prompt=prompt, negative_prompt=negative_prompt, reference_video_urls=reference_video_urls
-                ),
-                parameters=Reference2VideoParametersField(
-                    size=f"{width}*{height}",
-                    duration=duration,
-                    shot_type=shot_type,
-                    watermark=watermark,
-                    seed=seed,
-                ),
-            ),
-        )
-        if not initial_response.output:
-            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
-        response = await poll_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
-            response_model=VideoTaskStatusResponse,
-            status_extractor=lambda x: x.output.task_status,
-            poll_interval=6,
-            max_poll_attempts=280,
-        )
-        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
-
-
 class WanApiExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -888,7 +729,6 @@ class WanApiExtension(ComfyExtension):
            WanImageToImageApi,
            WanTextToVideoApi,
            WanImageToVideoApi,
-            WanReferenceVideoApi,
        ]


--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@ -119,7 +119,7 @@ async def upload_video_to_comfyapi(
            raise ValueError(f"Could not verify video duration from source: {e}") from e

    upload_mime_type = f"video/{container.value.lower()}"
-    filename = f"{uuid.uuid4()}.{container.value.lower()}"
+    filename = f"uploaded_video.{container.value.lower()}"

    # Convert VideoInput to BytesIO using specified container/codec
    video_bytes_io = BytesIO()
--- a/comfy_extras/nodes_lt_audio.py
+++ b/comfy_extras/nodes_lt_audio.py
@ -185,10 +185,6 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
                io.Combo.Input(
                    "ckpt_name",
                    options=folder_paths.get_filename_list("checkpoints"),
-                ),
-                io.Combo.Input(
-                    "device",
-                    options=["default", "cpu"],
                )
            ],
            outputs=[io.Clip.Output()],
@ -201,11 +197,7 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
        clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

-        model_options = {}
-        if device == "cpu":
-            model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")
-
-        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
        return io.NodeOutput(clip)


--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.8.2"
+__version__ = "0.7.0"
--- a/manager_requirements.txt
+++ b/manager_requirements.txt
@ -1 +1 @@
-comfyui_manager==4.0.5
+comfyui_manager==4.0.4
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.8.2"
+version = "0.7.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.69
+comfyui-workflow-templates==0.7.66
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.5
+comfy-kitchen>=0.2.0

 #non essential dependencies:
 kornia>=0.7.1