diff --git a/.github/workflows/release-webhook.yml b/.github/workflows/release-webhook.yml
index 6fceb7560..737e4c488 100644
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@@ -7,6 +7,8 @@ on:
 jobs:
   send-webhook:
     runs-on: ubuntu-latest
+    env:
+      DESKTOP_REPO_DISPATCH_TOKEN: ${{ secrets.DESKTOP_REPO_DISPATCH_TOKEN }}
     steps:
       - name: Send release webhook
         env:
@@ -106,3 +108,37 @@ jobs:
             --fail --silent --show-error
           
           echo "✅ Release webhook sent successfully"
+
+      - name: Send repository dispatch to desktop
+        env:
+          DISPATCH_TOKEN: ${{ env.DESKTOP_REPO_DISPATCH_TOKEN }}
+          RELEASE_TAG: ${{ github.event.release.tag_name }}
+          RELEASE_URL: ${{ github.event.release.html_url }}
+        run: |
+          set -euo pipefail
+
+          if [ -z "${DISPATCH_TOKEN:-}" ]; then
+            echo "::error::DESKTOP_REPO_DISPATCH_TOKEN is required but not set."
+            exit 1
+          fi
+
+          PAYLOAD="$(jq -n \
+            --arg release_tag "$RELEASE_TAG" \
+            --arg release_url "$RELEASE_URL" \
+            '{
+              event_type: "comfyui_release_published",
+              client_payload: {
+                release_tag: $release_tag,
+                release_url: $release_url
+              }
+            }')"
+
+          curl -fsSL \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
+            https://api.github.com/repos/Comfy-Org/desktop/dispatches \
+            -d "$PAYLOAD"
+
+          echo "✅ Dispatched ComfyUI release ${RELEASE_TAG} to Comfy-Org/desktop"
diff --git a/README.md b/README.md
index 96dc2904b..3ccdc9c19 100644
--- a/README.md
+++ b/README.md
@@ -227,7 +227,7 @@ Put your VAE in: models/vae
 
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
 
-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```
 
 This is the command to install the nightly with ROCm 7.1 which might have some performance improvements:
 
diff --git a/app/node_replace_manager.py b/app/node_replace_manager.py
new file mode 100644
index 000000000..03b603c70
--- /dev/null
+++ b/app/node_replace_manager.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+from aiohttp import web
+
+from typing import TYPE_CHECKING, TypedDict
+if TYPE_CHECKING:
+    from comfy_api.latest._io_public import NodeReplace
+
+from comfy_execution.graph_utils import is_link
+import nodes
+
+class NodeStruct(TypedDict):
+    inputs: dict[str, str | int | float | bool | tuple[str, int]]
+    class_type: str
+    _meta: dict[str, str]
+
+def copy_node_struct(node_struct: NodeStruct, empty_inputs: bool = False) -> NodeStruct:
+    new_node_struct = node_struct.copy()
+    if empty_inputs:
+        new_node_struct["inputs"] = {}
+    else:
+        new_node_struct["inputs"] = node_struct["inputs"].copy()
+    new_node_struct["_meta"] = node_struct["_meta"].copy()
+    return new_node_struct
+
+
+class NodeReplaceManager:
+    """Manages node replacement registrations."""
+
+    def __init__(self):
+        self._replacements: dict[str, list[NodeReplace]] = {}
+
+    def register(self, node_replace: NodeReplace):
+        """Register a node replacement mapping."""
+        self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace)
+
+    def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None:
+        """Get replacements for an old node ID."""
+        return self._replacements.get(old_node_id)
+
+    def has_replacement(self, old_node_id: str) -> bool:
+        """Check if a replacement exists for an old node ID."""
+        return old_node_id in self._replacements
+
+    def apply_replacements(self, prompt: dict[str, NodeStruct]):
+        connections: dict[str, list[tuple[str, str, int]]] = {}
+        need_replacement: set[str] = set()
+        for node_number, node_struct in prompt.items():
+            class_type = node_struct["class_type"]
+            # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
+            if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):
+                need_replacement.add(node_number)
+            # keep track of connections
+            for input_id, input_value in node_struct["inputs"].items():
+                if is_link(input_value):
+                    conn_number = input_value[0]
+                    connections.setdefault(conn_number, []).append((node_number, input_id, input_value[1]))
+        for node_number in need_replacement:
+            node_struct = prompt[node_number]
+            class_type = node_struct["class_type"]
+            replacements = self.get_replacement(class_type)
+            if replacements is None:
+                continue
+            # just use the first replacement
+            replacement = replacements[0]
+            new_node_id = replacement.new_node_id
+            # if replacement is not a valid node, skip trying to replace it as will only cause confusion
+            if new_node_id not in nodes.NODE_CLASS_MAPPINGS.keys():
+                continue
+            # first, replace node id (class_type)
+            new_node_struct = copy_node_struct(node_struct, empty_inputs=True)
+            new_node_struct["class_type"] = new_node_id
+            # TODO: consider replacing display_name in _meta as well for error reporting purposes; would need to query node schema
+            # second, replace inputs
+            if replacement.input_mapping is not None:
+                for input_map in replacement.input_mapping:
+                    if "set_value" in input_map:
+                        new_node_struct["inputs"][input_map["new_id"]] = input_map["set_value"]
+                    elif "old_id" in input_map:
+                        new_node_struct["inputs"][input_map["new_id"]] = node_struct["inputs"][input_map["old_id"]]
+            # finalize input replacement
+            prompt[node_number] = new_node_struct
+            # third, replace outputs
+            if replacement.output_mapping is not None:
+                # re-mapping outputs requires changing the input values of nodes that receive connections from this one
+                if node_number in connections:
+                    for conns in connections[node_number]:
+                        conn_node_number, conn_input_id, old_output_idx = conns
+                        for output_map in replacement.output_mapping:
+                            if output_map["old_idx"] == old_output_idx:
+                                new_output_idx = output_map["new_idx"]
+                                previous_input = prompt[conn_node_number]["inputs"][conn_input_id]
+                                previous_input[1] = new_output_idx
+
+    def as_dict(self):
+        """Serialize all replacements to dict."""
+        return {
+            k: [v.as_dict() for v in v_list]
+            for k, v_list in self._replacements.items()
+        }
+
+    def add_routes(self, routes):
+        @routes.get("/node_replacements")
+        async def get_node_replacements(request):
+            return web.json_response(self.as_dict())
diff --git a/comfy/checkpoint_pickle.py b/comfy/checkpoint_pickle.py
deleted file mode 100644
index 206551d3c..000000000
--- a/comfy/checkpoint_pickle.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import pickle
-
-load = pickle.load
-
-class Empty:
-    pass
-
-class Unpickler(pickle.Unpickler):
-    def find_class(self, module, name):
-        #TODO: safe unpickle
-        if module.startswith("pytorch_lightning"):
-            return Empty
-        return super().find_class(module, name)
diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index 9e1e704e0..ba670b16d 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -297,6 +297,30 @@ class ControlNet(ControlBase):
         self.model_sampling_current = None
         super().cleanup()
 
+
+class QwenFunControlNet(ControlNet):
+    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+        # Fun checkpoints are more sensitive to high strengths in the generic
+        # ControlNet merge path. Use a soft response curve so strength=1.0 stays
+        # unchanged while >1 grows more gently.
+        original_strength = self.strength
+        self.strength = math.sqrt(max(self.strength, 0.0))
+        try:
+            return super().get_control(x_noisy, t, cond, batched_number, transformer_options)
+        finally:
+            self.strength = original_strength
+
+    def pre_run(self, model, percent_to_timestep_function):
+        super().pre_run(model, percent_to_timestep_function)
+        self.set_extra_arg("base_model", model.diffusion_model)
+
+    def copy(self):
+        c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+        c.control_model = self.control_model
+        c.control_model_wrapped = self.control_model_wrapped
+        self.copy_to(c)
+        return c
+
 class ControlLoraOps:
     class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
         def __init__(self, in_features: int, out_features: int, bias: bool = True,
@@ -560,6 +584,7 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}):
 def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
     model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
     control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    sd = model_config.process_unet_state_dict(sd)
     control_model = controlnet_load_state_dict(control_model, sd)
     extra_conds = ['y', 'guidance']
     control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@@ -605,6 +630,53 @@ def load_controlnet_qwen_instantx(sd, model_options={}):
     control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
     return control
 
+
+def load_controlnet_qwen_fun(sd, model_options={}):
+    load_device = comfy.model_management.get_torch_device()
+    weight_dtype = comfy.utils.weight_dtype(sd)
+    unet_dtype = model_options.get("dtype", weight_dtype)
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+    in_features = sd["control_img_in.weight"].shape[1]
+    inner_dim = sd["control_img_in.weight"].shape[0]
+
+    block_weight = sd["control_blocks.0.attn.to_q.weight"]
+    attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0]
+    num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim))
+
+    model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel(
+        control_in_features=in_features,
+        inner_dim=inner_dim,
+        num_attention_heads=num_attention_heads,
+        attention_head_dim=attention_head_dim,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        operations=operations,
+        device=comfy.model_management.unet_offload_device(),
+        dtype=unet_dtype,
+    )
+    model = controlnet_load_state_dict(model, sd)
+
+    latent_format = comfy.latent_formats.Wan21()
+    control = QwenFunControlNet(
+        model,
+        compression_ratio=1,
+        latent_format=latent_format,
+        # Fun checkpoints already expect their own 33-channel context handling.
+        # Enabling generic concat_mask injects an extra mask channel at apply-time
+        # and breaks the intended fallback packing path.
+        concat_mask=False,
+        load_device=load_device,
+        manual_cast_dtype=manual_cast_dtype,
+        extra_conds=[],
+    )
+    return control
+
 def convert_mistoline(sd):
     return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})
 
@@ -682,6 +754,8 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
             return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
         elif "controlnet_x_embedder.weight" in controlnet_data:
             return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
+    elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data:
+        return load_controlnet_qwen_fun(controlnet_data, model_options=model_options)
 
     elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
         return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index c0c51d51a..6978eb717 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1,12 +1,11 @@
 import math
-import time
 from functools import partial
 
 from scipy import integrate
 import torch
 from torch import nn
 import torchsde
-from tqdm.auto import trange as trange_, tqdm
+from tqdm.auto import tqdm
 
 from . import utils
 from . import deis
@@ -15,34 +14,7 @@ import comfy.model_patcher
 import comfy.model_sampling
 
 import comfy.memory_management
-
-
-def trange(*args, **kwargs):
-    if comfy.memory_management.aimdo_allocator is None:
-        return trange_(*args, **kwargs)
-
-    pbar = trange_(*args, **kwargs, smoothing=1.0)
-    pbar._i = 0
-    pbar.set_postfix_str("  Model Initializing ...  ")
-
-    _update = pbar.update
-
-    def warmup_update(n=1):
-        pbar._i += 1
-        if pbar._i == 1:
-            pbar.i1_time = time.time()
-            pbar.set_postfix_str(" Model Initialization complete!  ")
-        elif pbar._i == 2:
-            #bring forward the effective start time based the the diff between first and second iteration
-            #to attempt to remove load overhead from the final step rate estimate.
-            pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time)
-            pbar.set_postfix_str("")
-
-        _update(n)
-
-    pbar.update = warmup_update
-    return pbar
-
+from comfy.utils import model_trange as trange
 
 def append_zero(x):
     return torch.cat([x, x.new_zeros([1])])
diff --git a/comfy/ldm/anima/model.py b/comfy/ldm/anima/model.py
index 2e6ed58fa..6fb51c4a4 100644
--- a/comfy/ldm/anima/model.py
+++ b/comfy/ldm/anima/model.py
@@ -195,8 +195,20 @@ class Anima(MiniTrainDIT):
         super().__init__(*args, **kwargs)
         self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations"))
 
-    def preprocess_text_embeds(self, text_embeds, text_ids):
+    def preprocess_text_embeds(self, text_embeds, text_ids, t5xxl_weights=None):
         if text_ids is not None:
-            return self.llm_adapter(text_embeds, text_ids)
+            out = self.llm_adapter(text_embeds, text_ids)
+            if t5xxl_weights is not None:
+                out = out * t5xxl_weights
+
+            if out.shape[1] < 512:
+                out = torch.nn.functional.pad(out, (0, 0, 0, 512 - out.shape[1]))
+            return out
         else:
             return text_embeds
+
+    def forward(self, x, timesteps, context, **kwargs):
+        t5xxl_ids = kwargs.pop("t5xxl_ids", None)
+        if t5xxl_ids is not None:
+            context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=kwargs.pop("t5xxl_weights", None))
+        return super().forward(x, timesteps, context, **kwargs)
diff --git a/comfy/ldm/chroma/layers.py b/comfy/ldm/chroma/layers.py
index 2d5684348..df348a8ed 100644
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@@ -3,7 +3,6 @@ from torch import Tensor, nn
 
 from comfy.ldm.flux.layers import (
     MLPEmbedder,
-    RMSNorm,
     ModulationOut,
 )
 
@@ -29,7 +28,7 @@ class Approximator(nn.Module):
         super().__init__()
         self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
         self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
-        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
+        self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)])
         self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)
 
     @property
diff --git a/comfy/ldm/chroma_radiance/layers.py b/comfy/ldm/chroma_radiance/layers.py
index 3c7bc9b6b..08d31e0ba 100644
--- a/comfy/ldm/chroma_radiance/layers.py
+++ b/comfy/ldm/chroma_radiance/layers.py
@@ -4,8 +4,6 @@ from functools import lru_cache
 import torch
 from torch import nn
 
-from comfy.ldm.flux.layers import RMSNorm
-
 
 class NerfEmbedder(nn.Module):
     """
@@ -145,7 +143,7 @@ class NerfGLUBlock(nn.Module):
         # We now need to generate parameters for 3 matrices.
         total_params = 3 * hidden_size_x**2 * mlp_ratio
         self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device)
-        self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device)
         self.mlp_ratio = mlp_ratio
 
 
@@ -178,7 +176,7 @@ class NerfGLUBlock(nn.Module):
 class NerfFinalLayer(nn.Module):
     def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None):
         super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
         self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -190,7 +188,7 @@ class NerfFinalLayer(nn.Module):
 class NerfFinalLayerConv(nn.Module):
     def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None):
         super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
         self.conv = operations.Conv2d(
             in_channels=hidden_size,
             out_channels=out_channels,
diff --git a/comfy/ldm/flux/layers.py b/comfy/ldm/flux/layers.py
index 60f2bdae2..3518a1922 100644
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -5,9 +5,9 @@ import torch
 from torch import Tensor, nn
 
 from .math import attention, rope
-import comfy.ops
-import comfy.ldm.common_dit
 
+# Fix import for some custom nodes, TODO: delete eventually.
+RMSNorm = None
 
 class EmbedND(nn.Module):
     def __init__(self, dim: int, theta: int, axes_dim: list):
@@ -87,20 +87,12 @@ def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dt
             operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
         )
 
-class RMSNorm(torch.nn.Module):
-    def __init__(self, dim: int, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device))
-
-    def forward(self, x: Tensor):
-        return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
-
 
 class QKNorm(torch.nn.Module):
     def __init__(self, dim: int, dtype=None, device=None, operations=None):
         super().__init__()
-        self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
-        self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
+        self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
+        self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
 
     def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
         q = self.query_norm(q)
@@ -169,7 +161,7 @@ class SiLUActivation(nn.Module):
 
 
 class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
         super().__init__()
 
         mlp_hidden_dim = int(hidden_size * mlp_ratio)
@@ -197,8 +189,6 @@ class DoubleStreamBlock(nn.Module):
 
         self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)
 
-        self.flipped_img_txt = flipped_img_txt
-
     def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}):
         if self.modulation:
             img_mod1, img_mod2 = self.img_mod(vec)
@@ -224,32 +214,17 @@ class DoubleStreamBlock(nn.Module):
         del txt_qkv
         txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
 
-        if self.flipped_img_txt:
-            q = torch.cat((img_q, txt_q), dim=2)
-            del img_q, txt_q
-            k = torch.cat((img_k, txt_k), dim=2)
-            del img_k, txt_k
-            v = torch.cat((img_v, txt_v), dim=2)
-            del img_v, txt_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
+        q = torch.cat((txt_q, img_q), dim=2)
+        del txt_q, img_q
+        k = torch.cat((txt_k, img_k), dim=2)
+        del txt_k, img_k
+        v = torch.cat((txt_v, img_v), dim=2)
+        del txt_v, img_v
+        # run actual attention
+        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
+        del q, k, v
 
-            img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:]
-        else:
-            q = torch.cat((txt_q, img_q), dim=2)
-            del txt_q, img_q
-            k = torch.cat((txt_k, img_k), dim=2)
-            del txt_k, img_k
-            v = torch.cat((txt_v, img_v), dim=2)
-            del txt_v, img_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
-
-            txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
 
         # calculate the img bloks
         img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py
index f9597de5b..5e764bb46 100644
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@@ -29,19 +29,34 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
     return out.to(dtype=torch.float32, device=pos.device)
 
 
+def _apply_rope1(x: Tensor, freqs_cis: Tensor):
+    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
+
+    x_out = freqs_cis[..., 0] * x_[..., 0]
+    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+
+    return x_out.reshape(*x.shape).type_as(x)
+
+
+def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+
+
 try:
     import comfy.quant_ops
-    apply_rope = comfy.quant_ops.ck.apply_rope
-    apply_rope1 = comfy.quant_ops.ck.apply_rope1
+    q_apply_rope = comfy.quant_ops.ck.apply_rope
+    q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
+    def apply_rope(xq, xk, freqs_cis):
+        if comfy.model_management.in_training:
+            return _apply_rope(xq, xk, freqs_cis)
+        else:
+            return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+    def apply_rope1(x, freqs_cis):
+        if comfy.model_management.in_training:
+            return _apply_rope1(x, freqs_cis)
+        else:
+            return q_apply_rope1(x, freqs_cis)
 except:
     logging.warning("No comfy kitchen, using old apply_rope functions.")
-    def apply_rope1(x: Tensor, freqs_cis: Tensor):
-        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
-
-        x_out = freqs_cis[..., 0] * x_[..., 0]
-        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
-
-        return x_out.reshape(*x.shape).type_as(x)
-
-    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+    apply_rope = _apply_rope
+    apply_rope1 = _apply_rope1
diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py
index f40c2a7a9..260ccad7e 100644
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -16,7 +16,6 @@ from .layers import (
     SingleStreamBlock,
     timestep_embedding,
     Modulation,
-    RMSNorm
 )
 
 @dataclass
@@ -81,7 +80,7 @@ class Flux(nn.Module):
         self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)
 
         if params.txt_norm:
-            self.txt_norm = RMSNorm(params.context_in_dim, dtype=dtype, device=device, operations=operations)
+            self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device)
         else:
             self.txt_norm = None
 
diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py
index 55ab550f8..563f28f6b 100644
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -241,7 +241,6 @@ class HunyuanVideo(nn.Module):
                     self.num_heads,
                     mlp_ratio=params.mlp_ratio,
                     qkv_bias=params.qkv_bias,
-                    flipped_img_txt=True,
                     dtype=dtype, device=device, operations=operations
                 )
                 for _ in range(params.depth)
@@ -378,14 +377,14 @@ class HunyuanVideo(nn.Module):
             extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
             txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1)
 
-        ids = torch.cat((img_ids, txt_ids), dim=1)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
         pe = self.pe_embedder(ids)
 
         img_len = img.shape[1]
         if txt_mask is not None:
             attn_mask_len = img_len + txt.shape[1]
             attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device)
-            attn_mask[:, 0, img_len:] = txt_mask
+            attn_mask[:, 0, :txt.shape[1]] = txt_mask
         else:
             attn_mask = None
 
@@ -413,7 +412,7 @@ class HunyuanVideo(nn.Module):
                     if add is not None:
                         img += add
 
-        img = torch.cat((img, txt), 1)
+        img = torch.cat((txt, img), 1)
 
         transformer_options["total_blocks"] = len(self.single_blocks)
         transformer_options["block_type"] = "single"
@@ -435,9 +434,9 @@ class HunyuanVideo(nn.Module):
                 if i < len(control_o):
                     add = control_o[i]
                     if add is not None:
-                        img[:, : img_len] += add
+                        img[:, txt.shape[1]: img_len + txt.shape[1]] += add
 
-        img = img[:, : img_len]
+        img = img[:, txt.shape[1]: img_len + txt.shape[1]]
         if ref_latent is not None:
             img = img[:, ref_latent.shape[1]:]
 
diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py
index 5a22ef030..805592aa5 100644
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@@ -102,19 +102,7 @@ class VideoConv3d(nn.Module):
         return self.conv(x)
 
 def interpolate_up(x, scale_factor):
-    try:
-        return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
-    except: #operation not implemented for bf16
-        orig_shape = list(x.shape)
-        out_shape = orig_shape[:2]
-        for i in range(len(orig_shape) - 2):
-            out_shape.append(round(orig_shape[i + 2] * scale_factor[i]))
-        out = torch.empty(out_shape, dtype=x.dtype, layout=x.layout, device=x.device)
-        split = 8
-        l = out.shape[1] // split
-        for i in range(0, out.shape[1], l):
-            out[:,i:i+l] = torch.nn.functional.interpolate(x[:,i:i+l].to(torch.float32), scale_factor=scale_factor, mode="nearest").to(x.dtype)
-        return out
+    return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
 
 class Upsample(nn.Module):
     def __init__(self, in_channels, with_conv, conv_op=ops.Conv2d, scale_factor=2.0):
diff --git a/comfy/ldm/qwen_image/controlnet.py b/comfy/ldm/qwen_image/controlnet.py
index a6d408104..c0aae9240 100644
--- a/comfy/ldm/qwen_image/controlnet.py
+++ b/comfy/ldm/qwen_image/controlnet.py
@@ -2,6 +2,196 @@ import torch
 import math
 
 from .model import QwenImageTransformer2DModel
+from .model import QwenImageTransformerBlock
+
+
+class QwenImageFunControlBlock(QwenImageTransformerBlock):
+    def __init__(self, dim, num_attention_heads, attention_head_dim, has_before_proj=False, dtype=None, device=None, operations=None):
+        super().__init__(
+            dim=dim,
+            num_attention_heads=num_attention_heads,
+            attention_head_dim=attention_head_dim,
+            dtype=dtype,
+            device=device,
+            operations=operations,
+        )
+        self.has_before_proj = has_before_proj
+        if has_before_proj:
+            self.before_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+        self.after_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+
+
+class QwenImageFunControlNetModel(torch.nn.Module):
+    def __init__(
+        self,
+        control_in_features=132,
+        inner_dim=3072,
+        num_attention_heads=24,
+        attention_head_dim=128,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        dtype=None,
+        device=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.dtype = dtype
+        self.main_model_double = main_model_double
+        self.injection_layers = tuple(injection_layers)
+        # Keep base hint scaling at 1.0 so user-facing strength behaves similarly
+        # to the reference Gen2/VideoX implementation around strength=1.
+        self.hint_scale = 1.0
+        self.control_img_in = operations.Linear(control_in_features, inner_dim, device=device, dtype=dtype)
+
+        self.control_blocks = torch.nn.ModuleList([])
+        for i in range(num_control_blocks):
+            self.control_blocks.append(
+                QwenImageFunControlBlock(
+                    dim=inner_dim,
+                    num_attention_heads=num_attention_heads,
+                    attention_head_dim=attention_head_dim,
+                    has_before_proj=(i == 0),
+                    dtype=dtype,
+                    device=device,
+                    operations=operations,
+                )
+            )
+
+    def _process_hint_tokens(self, hint):
+        if hint is None:
+            return None
+        if hint.ndim == 4:
+            hint = hint.unsqueeze(2)
+
+        # Fun checkpoints are trained with 33 latent channels before 2x2 packing:
+        # [control_latent(16), mask(1), inpaint_latent(16)] -> 132 features.
+        # Default behavior (no inpaint input in stock Apply ControlNet) should use
+        # zeros for mask/inpaint branches, matching VideoX fallback semantics.
+        expected_c = self.control_img_in.weight.shape[1] // 4
+        if hint.shape[1] == 16 and expected_c == 33:
+            zeros_mask = torch.zeros_like(hint[:, :1])
+            zeros_inpaint = torch.zeros_like(hint)
+            hint = torch.cat([hint, zeros_mask, zeros_inpaint], dim=1)
+
+        bs, c, t, h, w = hint.shape
+        hidden_states = torch.nn.functional.pad(hint, (0, w % 2, 0, h % 2))
+        orig_shape = hidden_states.shape
+        hidden_states = hidden_states.view(
+            orig_shape[0],
+            orig_shape[1],
+            orig_shape[-3],
+            orig_shape[-2] // 2,
+            2,
+            orig_shape[-1] // 2,
+            2,
+        )
+        hidden_states = hidden_states.permute(0, 2, 3, 5, 1, 4, 6)
+        hidden_states = hidden_states.reshape(
+            bs,
+            t * ((h + 1) // 2) * ((w + 1) // 2),
+            c * 4,
+        )
+
+        expected_in = self.control_img_in.weight.shape[1]
+        cur_in = hidden_states.shape[-1]
+        if cur_in < expected_in:
+            pad = torch.zeros(
+                (hidden_states.shape[0], hidden_states.shape[1], expected_in - cur_in),
+                device=hidden_states.device,
+                dtype=hidden_states.dtype,
+            )
+            hidden_states = torch.cat([hidden_states, pad], dim=-1)
+        elif cur_in > expected_in:
+            hidden_states = hidden_states[:, :, :expected_in]
+
+        return hidden_states
+
+    def forward(
+        self,
+        x,
+        timesteps,
+        context,
+        attention_mask=None,
+        guidance: torch.Tensor = None,
+        hint=None,
+        transformer_options={},
+        base_model=None,
+        **kwargs,
+    ):
+        if base_model is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a QwenImage base model at runtime.")
+
+        encoder_hidden_states_mask = attention_mask
+        # Keep attention mask disabled inside Fun control blocks to mirror
+        # VideoX behavior (they rely on seq lengths for RoPE, not masked attention).
+        encoder_hidden_states_mask = None
+
+        hidden_states, img_ids, _ = base_model.process_img(x)
+        hint_tokens = self._process_hint_tokens(hint)
+        if hint_tokens is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a control hint image.")
+
+        if hint_tokens.shape[1] != hidden_states.shape[1]:
+            max_tokens = min(hint_tokens.shape[1], hidden_states.shape[1])
+            hint_tokens = hint_tokens[:, :max_tokens]
+            hidden_states = hidden_states[:, :max_tokens]
+            img_ids = img_ids[:, :max_tokens]
+
+        txt_start = round(
+            max(
+                ((x.shape[-1] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+                ((x.shape[-2] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+            )
+        )
+        txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
+        image_rotary_emb = base_model.pe_embedder(ids).to(x.dtype).contiguous()
+
+        hidden_states = base_model.img_in(hidden_states)
+        encoder_hidden_states = base_model.txt_norm(context)
+        encoder_hidden_states = base_model.txt_in(encoder_hidden_states)
+
+        if guidance is not None:
+            guidance = guidance * 1000
+
+        temb = (
+            base_model.time_text_embed(timesteps, hidden_states)
+            if guidance is None
+            else base_model.time_text_embed(timesteps, guidance, hidden_states)
+        )
+
+        c = self.control_img_in(hint_tokens)
+
+        for i, block in enumerate(self.control_blocks):
+            if i == 0:
+                c_in = block.before_proj(c) + hidden_states
+                all_c = []
+            else:
+                all_c = list(torch.unbind(c, dim=0))
+                c_in = all_c.pop(-1)
+
+            encoder_hidden_states, c_out = block(
+                hidden_states=c_in,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_hidden_states_mask=encoder_hidden_states_mask,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                transformer_options=transformer_options,
+            )
+
+            c_skip = block.after_proj(c_out) * self.hint_scale
+            all_c += [c_skip, c_out]
+            c = torch.stack(all_c, dim=0)
+
+        hints = torch.unbind(c, dim=0)[:-1]
+
+        controlnet_block_samples = [None] * self.main_model_double
+        for local_idx, base_idx in enumerate(self.injection_layers):
+            if local_idx < len(hints) and base_idx < len(controlnet_block_samples):
+                controlnet_block_samples[base_idx] = hints[local_idx]
+
+        return {"input": controlnet_block_samples}
 
 
 class QwenImageControlNetModel(QwenImageTransformer2DModel):
diff --git a/comfy/lora.py b/comfy/lora.py
index 44030bcab..279cf38bb 100644
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -374,6 +374,31 @@ def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Ten
 
     return padded_tensor
 
+def calculate_shape(patches, weight, key, original_weights=None):
+    current_shape = weight.shape
+
+    for p in patches:
+        v = p[1]
+        offset = p[3]
+
+        # Offsets restore the old shape; lists force a diff without metadata
+        if offset is not None or isinstance(v, list):
+            continue
+
+        if isinstance(v, weight_adapter.WeightAdapterBase):
+            adapter_shape = v.calculate_shape(key)
+            if adapter_shape is not None:
+                current_shape = adapter_shape
+            continue
+
+        # Standard diff logic with padding
+        if len(v) == 2:
+            patch_type, patch_data = v[0], v[1]
+            if patch_type == "diff" and len(patch_data) > 1 and patch_data[1]['pad_weight']:
+                current_shape = patch_data[0].shape
+
+    return current_shape
+
 def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, original_weights=None):
     for p in patches:
         strength = p[0]
diff --git a/comfy/lora_convert.py b/comfy/lora_convert.py
index 9d8d21efe..749e81df3 100644
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@@ -5,7 +5,7 @@ import comfy.utils
 def convert_lora_bfl_control(sd): #BFL loras for Flux
     sd_out = {}
     for k in sd:
-        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
+        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.set_weight"))
         sd_out[k_to] = sd[k]
 
     sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 858789b30..4a74cb1ce 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1160,12 +1160,16 @@ class Anima(BaseModel):
         device = kwargs["device"]
         if cross_attn is not None:
             if t5xxl_ids is not None:
-                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.unsqueeze(0).to(device=device))
                 if t5xxl_weights is not None:
-                    cross_attn *= t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
+                    t5xxl_weights = t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
+                t5xxl_ids = t5xxl_ids.unsqueeze(0)
+
+                if torch.is_inference_mode_enabled():  # if not we are training
+                    cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype()))
+                else:
+                    out['t5xxl_ids'] = comfy.conds.CONDRegular(t5xxl_ids)
+                    out['t5xxl_weights'] = comfy.conds.CONDRegular(t5xxl_weights)
 
-                if cross_attn.shape[1] < 512:
-                    cross_attn = torch.nn.functional.pad(cross_attn, (0, 0, 0, 512 - cross_attn.shape[1]))
             out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
         return out
 
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index e8ad725df..30ea03e8e 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -19,6 +19,12 @@ def count_blocks(state_dict_keys, prefix_string):
         count += 1
     return count
 
+def any_suffix_in(keys, prefix, main, suffix_list=[]):
+    for x in suffix_list:
+        if "{}{}{}".format(prefix, main, x) in keys:
+            return True
+    return False
+
 def calculate_transformer_depth(prefix, state_dict_keys, state_dict):
     context_dim = None
     use_linear_in_transformer = False
@@ -186,7 +192,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
             dit_config["meanflow_sum"] = False
         return dit_config
 
-    if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
+    if any_suffix_in(state_dict_keys, key_prefix, 'double_blocks.0.img_attn.norm.key_norm.', ["weight", "scale"]) and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"])): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
         dit_config = {}
         if '{}double_stream_modulation_img.lin.weight'.format(key_prefix) in state_dict_keys:
             dit_config["image_model"] = "flux2"
@@ -241,7 +247,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
 
         dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
         dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
-        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
+
+        if any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.0.norms.0.', ["weight", "scale"]) or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"]): #Chroma
             dit_config["image_model"] = "chroma"
             dit_config["in_channels"] = 64
             dit_config["out_channels"] = 64
@@ -249,7 +256,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
             dit_config["out_dim"] = 3072
             dit_config["hidden_dim"] = 5120
             dit_config["n_layers"] = 5
-            if f"{key_prefix}nerf_blocks.0.norm.scale" in state_dict_keys: #Chroma Radiance
+
+            if any_suffix_in(state_dict_keys, key_prefix, 'nerf_blocks.0.norm.', ["weight", "scale"]): #Chroma Radiance
                 dit_config["image_model"] = "chroma_radiance"
                 dit_config["in_channels"] = 3
                 dit_config["out_channels"] = 3
@@ -259,7 +267,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                 dit_config["nerf_depth"] = 4
                 dit_config["nerf_max_freqs"] = 8
                 dit_config["nerf_tile_size"] = 512
-                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
+                dit_config["nerf_final_head_type"] = "conv" if any_suffix_in(state_dict_keys, key_prefix, 'nerf_final_layer_conv.norm.', ["weight", "scale"]) else "linear"
                 dit_config["nerf_embedder_dtype"] = torch.float32
                 if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                     dit_config["use_x0"] = True
@@ -268,7 +276,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         else:
             dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
             dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
-            dit_config["txt_norm"] = "{}txt_norm.scale".format(key_prefix) in state_dict_keys
+            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
             if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                 dit_config["txt_ids_dims"] = [1, 2]
 
diff --git a/comfy/model_management.py b/comfy/model_management.py
index b6291f340..38c3e482b 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -19,7 +19,7 @@
 import psutil
 import logging
 from enum import Enum
-from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
+from comfy.cli_args import args, PerformanceFeature
 import threading
 import torch
 import sys
@@ -55,6 +55,11 @@ cpu_state = CPUState.GPU
 
 total_vram = 0
 
+
+# Training Related State
+in_training = False
+
+
 def get_supported_float8_types():
     float8_types = []
     try:
@@ -651,7 +656,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
                 soft_empty_cache()
     return unloaded_models
 
-def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
+def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
     cleanup_models_gc()
     global vram_state
 
@@ -747,26 +752,6 @@ def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, m
         current_loaded_models.insert(0, loaded_model)
     return
 
-def load_models_gpu_thread(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load):
-    with torch.inference_mode():
-        load_models_gpu_orig(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
-        soft_empty_cache()
-
-def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
-    #Deliberately load models outside of the Aimdo mempool so they can be retained accross
-    #nodes. Use a dummy thread to do it as pytorch documents that mempool contexts are
-    #thread local. So exploit that to escape context
-    if enables_dynamic_vram():
-        t = threading.Thread(
-            target=load_models_gpu_thread,
-            args=(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
-        )
-        t.start()
-        t.join()
-    else:
-        load_models_gpu_orig(models, memory_required=memory_required, force_patch_weights=force_patch_weights,
-                             minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
-
 def load_model_gpu(model):
     return load_models_gpu([model])
 
@@ -1226,21 +1211,20 @@ def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, str
         if dtype is None:
             dtype = weight._model_dtype
 
-        r = torch.empty_like(weight, dtype=dtype, device=device)
-
         signature = comfy_aimdo.model_vbar.vbar_fault(weight._v)
         if signature is not None:
-            raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device)
-            v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, raw_tensor)[0]
-            if not comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature):
+            if comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature):
+                v_tensor = weight._v_tensor
+            else:
+                raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device)
+                v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, raw_tensor)[0]
+                weight._v_tensor = v_tensor
                 weight._v_signature = signature
                 #Send it over
                 v_tensor.copy_(weight, non_blocking=non_blocking)
-            #always take a deep copy even if _v is good, as we have no reasonable point to unpin
-            #a non comfy weight
-            r.copy_(v_tensor)
-            comfy_aimdo.model_vbar.vbar_unpin(weight._v)
-            return r
+            return v_tensor.to(dtype=dtype)
+
+        r = torch.empty_like(weight, dtype=dtype, device=device)
 
         if weight.dtype != r.dtype and weight.dtype != weight._model_dtype:
             #Offloaded casting could skip this, however it would make the quantizations
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index d888dbcfb..f01818f50 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -19,7 +19,6 @@
 from __future__ import annotations
 
 import collections
-import copy
 import inspect
 import logging
 import math
@@ -317,7 +316,7 @@ class ModelPatcher:
 
         n.object_patches = self.object_patches.copy()
         n.weight_wrapper_patches = self.weight_wrapper_patches.copy()
-        n.model_options = copy.deepcopy(self.model_options)
+        n.model_options = comfy.utils.deepcopy_list_dict(self.model_options)
         n.backup = self.backup
         n.object_patches_backup = self.object_patches_backup
         n.parent = self
@@ -680,18 +679,19 @@ class ModelPatcher:
         for key in list(self.pinned):
             self.unpin_weight(key)
 
-    def _load_list(self, prio_comfy_cast_weights=False):
+    def _load_list(self, prio_comfy_cast_weights=False, default_device=None):
         loading = []
         for n, m in self.model.named_modules():
-            params = []
-            skip = False
-            for name, param in m.named_parameters(recurse=False):
-                params.append(name)
+            default = False
+            params = { name: param for name, param in m.named_parameters(recurse=False) }
             for name, param in m.named_parameters(recurse=True):
                 if name not in params:
-                    skip = True # skip random weights in non leaf modules
+                    default = True # default random weights in non leaf modules
                     break
-            if not skip and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
+            if default and default_device is not None:
+                for param in params.values():
+                    param.data = param.data.to(device=default_device)
+            if not default and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
                 module_mem = comfy.model_management.module_size(m)
                 module_offload_mem = module_mem
                 if hasattr(m, "comfy_cast_weights"):
@@ -1492,9 +1492,11 @@ class ModelPatcherDynamic(ModelPatcher):
             if vbar is not None:
                 vbar.prioritize()
 
-            #We have way more tools for acceleration on comfy weight offloading, so always
+            #We force reserve VRAM for the non comfy-weight so we dont have to deal
+            #with pin and unpin syncrhonization which can be expensive for small weights
+            #with a high layer rate (e.g. autoregressive LLMs).
             #prioritize the non-comfy weights (note the order reverse).
-            loading = self._load_list(prio_comfy_cast_weights=True)
+            loading = self._load_list(prio_comfy_cast_weights=True, default_device=device_to)
             loading.sort(reverse=True)
 
             for x in loading:
@@ -1512,8 +1514,10 @@ class ModelPatcherDynamic(ModelPatcher):
 
                     weight, _, _ = get_key_weight(self.model, key)
                     if weight is None:
-                        return 0
+                        return (False, 0)
                     if key in self.patches:
+                        if comfy.lora.calculate_shape(self.patches[key], weight, key) != weight.shape:
+                            return (True, 0)
                         setattr(m, param_key + "_lowvram_function", LowVramPatch(key, self.patches))
                         num_patches += 1
                     else:
@@ -1524,10 +1528,16 @@ class ModelPatcherDynamic(ModelPatcher):
                     setattr(m, param_key + "_function", weight_function)
                     geometry = weight
                     if not isinstance(weight, QuantizedTensor):
-                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", weight.dtype)
+                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", None) or weight.dtype
                         weight._model_dtype = model_dtype
                         geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
-                    return comfy.memory_management.vram_aligned_size(geometry)
+                    return (False, comfy.memory_management.vram_aligned_size(geometry))
+
+                def force_load_param(self, param_key, device_to):
+                    key = key_param_name_to_key(n, param_key)
+                    if key in self.backup:
+                        comfy.utils.set_attr_param(self.model, key, self.backup[key].weight)
+                    self.patch_weight_to_device(key, device_to=device_to)
 
                 if hasattr(m, "comfy_cast_weights"):
                     m.comfy_cast_weights = True
@@ -1535,13 +1545,19 @@ class ModelPatcherDynamic(ModelPatcher):
                     m.seed_key = n
                     set_dirty(m, dirty)
 
-                    v_weight_size = 0
-                    v_weight_size += setup_param(self, m, n, "weight")
-                    v_weight_size += setup_param(self, m, n, "bias")
+                    force_load, v_weight_size = setup_param(self, m, n, "weight")
+                    force_load_bias, v_weight_bias = setup_param(self, m, n, "bias")
+                    force_load = force_load or force_load_bias
+                    v_weight_size += v_weight_bias
 
-                    if vbar is not None and not hasattr(m, "_v"):
-                        m._v = vbar.alloc(v_weight_size)
-                    allocated_size += v_weight_size
+                    if force_load:
+                        logging.info(f"Module {n} has resizing Lora - force loading")
+                        force_load_param(self, "weight", device_to)
+                        force_load_param(self, "bias", device_to)
+                    else:
+                        if vbar is not None and not hasattr(m, "_v"):
+                            m._v = vbar.alloc(v_weight_size)
+                        allocated_size += v_weight_size
 
                 else:
                     for param in params:
@@ -1550,13 +1566,16 @@ class ModelPatcherDynamic(ModelPatcher):
                         weight.seed_key = key
                         set_dirty(weight, dirty)
                         geometry = weight
-                        model_dtype = getattr(m, param + "_comfy_model_dtype", weight.dtype)
+                        model_dtype = getattr(m, param + "_comfy_model_dtype", None) or weight.dtype
                         geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
                         weight_size = geometry.numel() * geometry.element_size()
                         if vbar is not None and not hasattr(weight, "_v"):
                             weight._v = vbar.alloc(weight_size)
                             weight._model_dtype = model_dtype
                         allocated_size += weight_size
+                    vbar.set_watermark_limit(allocated_size)
+
+                move_weight_functions(m, device_to)
 
             logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.")
 
@@ -1577,7 +1596,7 @@ class ModelPatcherDynamic(ModelPatcher):
         return 0 if vbar is None else vbar.free_memory(memory_to_free)
 
     def partially_unload_ram(self, ram_to_unload):
-        loading = self._load_list(prio_comfy_cast_weights=True)
+        loading = self._load_list(prio_comfy_cast_weights=True, default_device=self.offload_device)
         for x in loading:
             _, _, _, _, m, _ = x
             ram_to_unload -= comfy.pinned_memory.unpin_memory(m)
@@ -1598,6 +1617,13 @@ class ModelPatcherDynamic(ModelPatcher):
         if unpatch_weights:
             self.partially_unload_ram(1e32)
             self.partially_unload(None, 1e32)
+            for m in self.model.modules():
+                move_weight_functions(m, device_to)
+
+            keys = list(self.backup.keys())
+            for k in keys:
+                bk = self.backup[k]
+                comfy.utils.set_attr_param(self.model, k, bk.weight)
 
     def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
         assert not force_patch_weights #See above
diff --git a/comfy/ops.py b/comfy/ops.py
index 0f4eca7c7..688937e43 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -83,14 +83,18 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
 def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype):
     offload_stream = None
     xfer_dest = None
-    cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
 
     signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
-    if signature is not None:
-        xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
     resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
+    if signature is not None:
+        if resident:
+            weight = s._v_weight
+            bias = s._v_bias
+        else:
+            xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
 
     if not resident:
+        cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
         cast_dest = None
 
         xfer_source = [ s.weight, s.bias ]
@@ -140,9 +144,13 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
                     post_cast.copy_(pre_cast)
             xfer_dest = cast_dest
 
-    params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
-    weight = params[0]
-    bias = params[1]
+        params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
+        weight = params[0]
+        bias = params[1]
+        if signature is not None:
+            s._v_weight = weight
+            s._v_bias = bias
+        s._v_signature=signature
 
     def post_cast(s, param_key, x, dtype, resident, update_weight):
         lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
@@ -169,8 +177,8 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
                 if orig.dtype == dtype and len(fns) == 0:
                     #The layer actually wants our freshly saved QT
                     x = y
-            else:
-                y = x
+            elif update_weight:
+                y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key))
             if update_weight:
                 orig.copy_(y)
         for f in fns:
@@ -182,7 +190,6 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
     weight = post_cast(s, "weight", weight, dtype, resident, update_weight)
     if s.bias is not None:
         bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight)
-    s._v_signature=signature
 
     #FIXME: weird offload return protocol
     return weight, bias, (offload_stream, device if signature is not None else None, None)
diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py
index 9134e6d71..1f75f2ba7 100644
--- a/comfy/sampler_helpers.py
+++ b/comfy/sampler_helpers.py
@@ -122,20 +122,26 @@ def estimate_memory(model, noise_shape, conds):
     minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min)
     return memory_required, minimum_memory_required
 
-def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False):
+def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False):
     executor = comfy.patcher_extension.WrapperExecutor.new_executor(
         _prepare_sampling,
         comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True)
     )
-    return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load)
+    return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load, force_offload=force_offload)
 
-def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False):
+def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False):
     real_model: BaseModel = None
     models, inference_memory = get_additional_models(conds, model.model_dtype())
     models += get_additional_models_from_model_options(model_options)
     models += model.get_nested_additional_models()  # TODO: does this require inference_memory update?
-    memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
-    comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory, force_full_load=force_full_load)
+    if force_offload: # In training + offload enabled, we want to force prepare sampling to trigger partial load
+        memory_required = 1e20
+        minimum_memory_required = None
+    else:
+        memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
+        memory_required += inference_memory
+        minimum_memory_required += inference_memory
+    comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required, minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
     real_model = model.model
 
     return real_model, conds, models
diff --git a/comfy/sd.py b/comfy/sd.py
index bc9407405..f65e7cadd 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -793,8 +793,6 @@ class VAE:
             self.first_stage_model = AutoencoderKL(**(config['params']))
         self.first_stage_model = self.first_stage_model.eval()
 
-        model_management.archive_model_dtypes(self.first_stage_model)
-
         if device is None:
             device = model_management.vae_device()
         self.device = device
@@ -803,6 +801,7 @@ class VAE:
             dtype = model_management.vae_dtype(self.device, self.working_dtypes)
         self.vae_dtype = dtype
         self.first_stage_model.to(self.vae_dtype)
+        model_management.archive_model_dtypes(self.first_stage_model)
         self.output_device = model_management.intermediate_device()
 
         mp = comfy.model_patcher.CoreModelPatcher
diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
index 4c817d468..b564d1529 100644
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -171,8 +171,9 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
 
     def process_tokens(self, tokens, device):
         end_token = self.special_tokens.get("end", None)
+        pad_token = self.special_tokens.get("pad", -1)
         if end_token is None:
-            cmp_token = self.special_tokens.get("pad", -1)
+            cmp_token = pad_token
         else:
             cmp_token = end_token
 
@@ -186,15 +187,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
             other_embeds = []
             eos = False
             index = 0
+            left_pad = False
             for y in x:
                 if isinstance(y, numbers.Integral):
-                    if eos:
+                    token = int(y)
+                    if index == 0 and token == pad_token:
+                        left_pad = True
+
+                    if eos or (left_pad and token == pad_token):
                         attention_mask.append(0)
                     else:
                         attention_mask.append(1)
-                    token = int(y)
+                        left_pad = False
+
                     tokens_temp += [token]
-                    if not eos and token == cmp_token:
+                    if not eos and token == cmp_token and not left_pad:
                         if end_token is None:
                             attention_mask[-1] = 0
                         eos = True
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index d33db7507..c28be1716 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -710,6 +710,15 @@ class Flux(supported_models_base.BASE):
 
     supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
 
+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith("_norm.scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
     vae_key_prefix = ["vae."]
     text_encoder_key_prefix = ["text_encoders."]
 
@@ -898,11 +907,13 @@ class HunyuanVideo(supported_models_base.BASE):
             key_out = key_out.replace("txt_in.c_embedder.linear_1.", "txt_in.c_embedder.in_layer.").replace("txt_in.c_embedder.linear_2.", "txt_in.c_embedder.out_layer.")
             key_out = key_out.replace("_mod.linear.", "_mod.lin.").replace("_attn_qkv.", "_attn.qkv.")
             key_out = key_out.replace("mlp.fc1.", "mlp.0.").replace("mlp.fc2.", "mlp.2.")
-            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.scale").replace("_attn_k_norm.weight", "_attn.norm.key_norm.scale")
-            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.scale").replace(".k_norm.weight", ".norm.key_norm.scale")
+            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.weight").replace("_attn_k_norm.weight", "_attn.norm.key_norm.weight")
+            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.weight").replace(".k_norm.weight", ".norm.key_norm.weight")
             key_out = key_out.replace("_attn_proj.", "_attn.proj.")
             key_out = key_out.replace(".modulation.linear.", ".modulation.lin.")
             key_out = key_out.replace("_in.mlp.2.", "_in.out_layer.").replace("_in.mlp.0.", "_in.in_layer.")
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
             out_sd[key_out] = state_dict[k]
         return out_sd
 
@@ -1264,6 +1275,15 @@ class Hunyuan3Dv2(supported_models_base.BASE):
 
     latent_format = latent_formats.Hunyuan3Dv2
 
+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
     def process_unet_state_dict_for_saving(self, state_dict):
         replace_prefix = {"": "model."}
         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
@@ -1341,6 +1361,14 @@ class Chroma(supported_models_base.BASE):
 
     supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
 
+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
 
     def get_model(self, state_dict, prefix="", device=None):
         out = model_base.Chroma(self, device=device)
diff --git a/comfy/text_encoders/ace15.py b/comfy/text_encoders/ace15.py
index 00dd5ba90..f135d74c1 100644
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@@ -10,12 +10,12 @@ import comfy.utils
 def sample_manual_loop_no_classes(
     model,
     ids=None,
-    paddings=[],
     execution_dtype=None,
     cfg_scale: float = 2.0,
     temperature: float = 0.85,
     top_p: float = 0.9,
     top_k: int = None,
+    min_p: float = 0.000,
     seed: int = 1,
     min_tokens: int = 1,
     max_new_tokens: int = 2048,
@@ -23,6 +23,8 @@ def sample_manual_loop_no_classes(
     audio_end_id: int = 215669,
     eos_token_id: int = 151645,
 ):
+    if ids is None:
+        return []
     device = model.execution_device
 
     if execution_dtype is None:
@@ -32,31 +34,34 @@ def sample_manual_loop_no_classes(
             execution_dtype = torch.float32
 
     embeds, attention_mask, num_tokens, embeds_info = model.process_tokens(ids, device)
-    for i, t in enumerate(paddings):
-        attention_mask[i, :t] = 0
-        attention_mask[i, t:] = 1
+    embeds_batch = embeds.shape[0]
 
     output_audio_codes = []
     past_key_values = []
     generator = torch.Generator(device=device)
     generator.manual_seed(seed)
     model_config = model.transformer.model.config
+    past_kv_shape = [embeds_batch, model_config.num_key_value_heads, embeds.shape[1] + min_tokens, model_config.head_dim]
 
     for x in range(model_config.num_hidden_layers):
-        past_key_values.append((torch.empty([embeds.shape[0], model_config.num_key_value_heads, embeds.shape[1] + min_tokens, model_config.head_dim], device=device, dtype=execution_dtype), torch.empty([embeds.shape[0], model_config.num_key_value_heads, embeds.shape[1] + min_tokens, model_config.head_dim], device=device, dtype=execution_dtype), 0))
+        past_key_values.append((torch.empty(past_kv_shape, device=device, dtype=execution_dtype), torch.empty(past_kv_shape, device=device, dtype=execution_dtype), 0))
 
     progress_bar = comfy.utils.ProgressBar(max_new_tokens)
 
-    for step in range(max_new_tokens):
+    for step in comfy.utils.model_trange(max_new_tokens, desc="LM sampling"):
         outputs = model.transformer(None, attention_mask, embeds=embeds.to(execution_dtype), num_tokens=num_tokens, intermediate_output=None, dtype=execution_dtype, embeds_info=embeds_info, past_key_values=past_key_values)
         next_token_logits = model.transformer.logits(outputs[0])[:, -1]
         past_key_values = outputs[2]
 
-        cond_logits = next_token_logits[0:1]
-        uncond_logits = next_token_logits[1:2]
-        cfg_logits = uncond_logits + cfg_scale * (cond_logits - uncond_logits)
+        if cfg_scale != 1.0:
+            cond_logits = next_token_logits[0:1]
+            uncond_logits = next_token_logits[1:2]
+            cfg_logits = uncond_logits + cfg_scale * (cond_logits - uncond_logits)
+        else:
+            cfg_logits = next_token_logits[0:1]
 
-        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
+        use_eos_score = eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step
+        if use_eos_score:
             eos_score = cfg_logits[:, eos_token_id].clone()
 
         remove_logit_value = torch.finfo(cfg_logits.dtype).min
@@ -64,7 +69,7 @@ def sample_manual_loop_no_classes(
         cfg_logits[:, :audio_start_id] = remove_logit_value
         cfg_logits[:, audio_end_id:] = remove_logit_value
 
-        if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
+        if use_eos_score:
             cfg_logits[:, eos_token_id] = eos_score
 
         if top_k is not None and top_k > 0:
@@ -72,6 +77,12 @@ def sample_manual_loop_no_classes(
             min_val = top_k_vals[..., -1, None]
             cfg_logits[cfg_logits < min_val] = remove_logit_value
 
+        if min_p is not None and min_p > 0:
+            probs = torch.softmax(cfg_logits, dim=-1)
+            p_max = probs.max(dim=-1, keepdim=True).values
+            indices_to_remove = probs < (min_p * p_max)
+            cfg_logits[indices_to_remove] = remove_logit_value
+
         if top_p is not None and top_p < 1.0:
             sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True)
             cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
@@ -93,8 +104,8 @@ def sample_manual_loop_no_classes(
             break
 
         embed, _, _, _ = model.process_tokens([[token]], device)
-        embeds = embed.repeat(2, 1, 1)
-        attention_mask = torch.cat([attention_mask, torch.ones((2, 1), device=device, dtype=attention_mask.dtype)], dim=1)
+        embeds = embed.repeat(embeds_batch, 1, 1)
+        attention_mask = torch.cat([attention_mask, torch.ones((embeds_batch, 1), device=device, dtype=attention_mask.dtype)], dim=1)
 
         output_audio_codes.append(token - audio_start_id)
         progress_bar.update_absolute(step)
@@ -102,24 +113,29 @@ def sample_manual_loop_no_classes(
     return output_audio_codes
 
 
-def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=1024, seed=0, cfg_scale=2.0, temperature=0.85, top_p=0.9, top_k=0):
+def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=1024, seed=0, cfg_scale=2.0, temperature=0.85, top_p=0.9, top_k=0, min_p=0.000):
     positive = [[token for token, _ in inner_list] for inner_list in positive]
-    negative = [[token for token, _ in inner_list] for inner_list in negative]
     positive = positive[0]
-    negative = negative[0]
 
-    neg_pad = 0
-    if len(negative) < len(positive):
-        neg_pad = (len(positive) - len(negative))
-        negative = [model.special_tokens["pad"]] * neg_pad + negative
+    if cfg_scale != 1.0:
+        negative = [[token for token, _ in inner_list] for inner_list in negative]
+        negative = negative[0]
 
-    pos_pad = 0
-    if len(negative) > len(positive):
-        pos_pad = (len(negative) - len(positive))
-        positive = [model.special_tokens["pad"]] * pos_pad + positive
+        neg_pad = 0
+        if len(negative) < len(positive):
+            neg_pad = (len(positive) - len(negative))
+            negative = [model.special_tokens["pad"]] * neg_pad + negative
 
-    paddings = [pos_pad, neg_pad]
-    return sample_manual_loop_no_classes(model, [positive, negative], paddings, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)
+        pos_pad = 0
+        if len(negative) > len(positive):
+            pos_pad = (len(negative) - len(positive))
+            positive = [model.special_tokens["pad"]] * pos_pad + positive
+
+        ids = [positive, negative]
+    else:
+        ids = [positive]
+
+    return sample_manual_loop_no_classes(model, ids, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)
 
 
 class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
@@ -129,12 +145,12 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
     def _metas_to_cot(self, *, return_yaml: bool = False, **kwargs) -> str:
         user_metas = {
             k: kwargs.pop(k)
-            for k in ("bpm", "duration", "keyscale", "timesignature", "language", "caption")
+            for k in ("bpm", "duration", "keyscale", "timesignature")
             if k in kwargs
         }
         timesignature = user_metas.get("timesignature")
         if isinstance(timesignature, str) and timesignature.endswith("/4"):
-            user_metas["timesignature"] = timesignature.rsplit("/", 1)[0]
+            user_metas["timesignature"] = timesignature[:-2]
         user_metas = {
             k: v if not isinstance(v, str) or not v.isdigit() else int(v)
             for k, v in user_metas.items()
@@ -147,8 +163,11 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
         return f"<think>\n{meta_yaml}\n</think>" if not return_yaml else meta_yaml
 
     def _metas_to_cap(self, **kwargs) -> str:
-        use_keys = ("bpm", "duration", "keyscale", "timesignature")
+        use_keys = ("bpm", "timesignature", "keyscale", "duration")
         user_metas = { k: kwargs.pop(k, "N/A") for k in use_keys }
+        timesignature = user_metas.get("timesignature")
+        if isinstance(timesignature, str) and timesignature.endswith("/4"):
+            user_metas["timesignature"] = timesignature[:-2]
         duration = user_metas["duration"]
         if duration == "N/A":
             user_metas["duration"] = "30 seconds"
@@ -159,9 +178,13 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
         return "\n".join(f"- {k}: {user_metas[k]}" for k in use_keys)
 
     def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
-        out = {}
+        text = text.strip()
+        text_negative = kwargs.get("caption_negative", text).strip()
         lyrics = kwargs.get("lyrics", "")
+        lyrics_negative = kwargs.get("lyrics_negative", lyrics)
         duration = kwargs.get("duration", 120)
+        if isinstance(duration, str):
+            duration = float(duration.split(None, 1)[0])
         language = kwargs.get("language")
         seed = kwargs.get("seed", 0)
 
@@ -170,28 +193,55 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
         temperature = kwargs.get("temperature", 0.85)
         top_p = kwargs.get("top_p", 0.9)
         top_k = kwargs.get("top_k", 0.0)
-
+        min_p = kwargs.get("min_p", 0.000)
 
         duration = math.ceil(duration)
         kwargs["duration"] = duration
+        tokens_duration = duration * 5
+        min_tokens = int(kwargs.get("min_tokens", tokens_duration))
+        max_tokens = int(kwargs.get("max_tokens", tokens_duration))
 
-        cot_text = self._metas_to_cot(caption = text, **kwargs)
+        metas_negative = {
+            k.rsplit("_", 1)[0]: kwargs.pop(k)
+            for k in ("bpm_negative", "duration_negative", "keyscale_negative", "timesignature_negative", "language_negative", "caption_negative")
+            if k in kwargs
+        }
+        if not kwargs.get("use_negative_caption"):
+            _ = metas_negative.pop("caption", None)
+
+        cot_text = self._metas_to_cot(caption=text, **kwargs)
+        cot_text_negative = "<think>\n\n</think>" if not metas_negative else self._metas_to_cot(**metas_negative)
         meta_cap = self._metas_to_cap(**kwargs)
 
-        lm_template = "<|im_start|>system\n# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n<|im_end|>\n<|im_start|>user\n# Caption\n{}\n# Lyric\n{}\n<|im_end|>\n<|im_start|>assistant\n{}\n<|im_end|>\n"
+        lm_template = "<|im_start|>system\n# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n<|im_end|>\n<|im_start|>user\n# Caption\n{}\n\n# Lyric\n{}\n<|im_end|>\n<|im_start|>assistant\n{}\n\n<|im_end|>\n"
+        lyrics_template = "# Languages\n{}\n\n# Lyric\n{}<|endoftext|><|endoftext|>"
+        qwen3_06b_template = "# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n# Caption\n{}\n\n# Metas\n{}\n<|endoftext|>\n<|endoftext|>"
 
-        out["lm_prompt"] = self.qwen3_06b.tokenize_with_weights(lm_template.format(text, lyrics, cot_text), disable_weights=True)
-        out["lm_prompt_negative"] = self.qwen3_06b.tokenize_with_weights(lm_template.format(text, lyrics, "<think>\n</think>"), disable_weights=True)
+        llm_prompts = {
+            "lm_prompt": lm_template.format(text, lyrics.strip(), cot_text),
+            "lm_prompt_negative": lm_template.format(text_negative, lyrics_negative.strip(), cot_text_negative),
+            "lyrics": lyrics_template.format(language if language is not None else "", lyrics),
+            "qwen3_06b": qwen3_06b_template.format(text, meta_cap),
+        }
 
-        out["lyrics"] = self.qwen3_06b.tokenize_with_weights("# Languages\n{}\n\n# Lyric\n{}<|endoftext|><|endoftext|>".format(language if language is not None else "", lyrics), return_word_ids, disable_weights=True, **kwargs)
-        out["qwen3_06b"] = self.qwen3_06b.tokenize_with_weights("# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n# Caption\n{}\n# Metas\n{}\n<|endoftext|>\n<|endoftext|>".format(text, meta_cap), return_word_ids, **kwargs)
-        out["lm_metadata"] = {"min_tokens": duration * 5,
+        out = {
+            prompt_key: self.qwen3_06b.tokenize_with_weights(
+                prompt,
+                prompt_key == "qwen3_06b" and return_word_ids,
+                disable_weights = True,
+                **kwargs,
+            )
+            for prompt_key, prompt in llm_prompts.items()
+        }
+        out["lm_metadata"] = {"min_tokens": min_tokens,
+                              "max_tokens": max_tokens,
                               "seed": seed,
                               "generate_audio_codes": generate_audio_codes,
                               "cfg_scale": cfg_scale,
                               "temperature": temperature,
                               "top_p": top_p,
                               "top_k": top_k,
+                              "min_p": min_p,
                               }
         return out
 
@@ -252,7 +302,7 @@ class ACE15TEModel(torch.nn.Module):
 
         lm_metadata = token_weight_pairs["lm_metadata"]
         if lm_metadata["generate_audio_codes"]:
-            audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["min_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"])
+            audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["min_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"], min_p=lm_metadata["min_p"])
             out["audio_codes"] = [audio_codes]
 
         return base_out, None, out
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index b6735d210..54f3d5595 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -355,13 +355,6 @@ class RMSNorm(nn.Module):
 
 
 
-def rotate_half(x):
-    """Rotates half the hidden dims of the input."""
-    x1 = x[..., : x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2 :]
-    return torch.cat((-x2, x1), dim=-1)
-
-
 def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None):
     if not isinstance(theta, list):
         theta = [theta]
@@ -390,20 +383,30 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_di
         else:
             cos = cos.unsqueeze(1)
             sin = sin.unsqueeze(1)
-        out.append((cos, sin))
+        sin_split = sin.shape[-1] // 2
+        out.append((cos, sin[..., : sin_split], -sin[..., sin_split :]))
 
     if len(out) == 1:
         return out[0]
 
     return out
 
-
 def apply_rope(xq, xk, freqs_cis):
     org_dtype = xq.dtype
     cos = freqs_cis[0]
     sin = freqs_cis[1]
-    q_embed = (xq * cos) + (rotate_half(xq) * sin)
-    k_embed = (xk * cos) + (rotate_half(xk) * sin)
+    nsin = freqs_cis[2]
+
+    q_embed = (xq * cos)
+    q_split = q_embed.shape[-1] // 2
+    q_embed[..., : q_split].addcmul_(xq[..., q_split :], nsin)
+    q_embed[..., q_split :].addcmul_(xq[..., : q_split], sin)
+
+    k_embed = (xk * cos)
+    k_split = k_embed.shape[-1] // 2
+    k_embed[..., : k_split].addcmul_(xk[..., k_split :], nsin)
+    k_embed[..., k_split :].addcmul_(xk[..., : k_split], sin)
+
     return q_embed.to(org_dtype), k_embed.to(org_dtype)
 
 
diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py
index 3f87dfd6a..9cf87c0b2 100644
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -25,7 +25,7 @@ def ltxv_te(*args, **kwargs):
 class Gemma3_12BTokenizer(sd1_clip.SDTokenizer):
     def __init__(self, embedding_directory=None, tokenizer_data={}):
         tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
 
     def state_dict(self):
         return {"spiece_model": self.tokenizer.serialize_model()}
@@ -97,6 +97,7 @@ class LTXAVTEModel(torch.nn.Module):
         token_weight_pairs = token_weight_pairs["gemma3_12b"]
 
         out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
+        out = out[:, :, -torch.sum(extra["attention_mask"]).item():]
         out_device = out.device
         if comfy.model_management.should_use_bf16(self.execution_device):
             out = out.to(device=self.execution_device, dtype=torch.bfloat16)
@@ -138,6 +139,7 @@ class LTXAVTEModel(torch.nn.Module):
 
         token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
         num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        num_tokens = max(num_tokens, 64)
         return num_tokens * constant * 1024 * 1024
 
 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
diff --git a/comfy/utils.py b/comfy/utils.py
index 1337e2205..c1ce540b5 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -20,13 +20,14 @@
 import torch
 import math
 import struct
-import comfy.checkpoint_pickle
+import comfy.memory_management
 import safetensors.torch
 import numpy as np
 from PIL import Image
 import logging
 import itertools
 from torch.nn.functional import interpolate
+from tqdm.auto import trange
 from einops import rearrange
 from comfy.cli_args import args, enables_dynamic_vram
 import json
@@ -37,26 +38,26 @@ import warnings
 MMAP_TORCH_FILES = args.mmap_torch_files
 DISABLE_MMAP = args.disable_mmap
 
-ALWAYS_SAFE_LOAD = False
-if hasattr(torch.serialization, "add_safe_globals"):  # TODO: this was added in pytorch 2.4, the unsafe path should be removed once earlier versions are deprecated
+
+if True:  # ckpt/pt file whitelist for safe loading of old sd files
     class ModelCheckpoint:
         pass
     ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint"
 
     def scalar(*args, **kwargs):
-        from numpy.core.multiarray import scalar as sc
-        return sc(*args, **kwargs)
+        return None
     scalar.__module__ = "numpy.core.multiarray"
 
     from numpy import dtype
     from numpy.dtypes import Float64DType
-    from _codecs import encode
+
+    def encode(*args, **kwargs):  # no longer necessary on newer torch
+        return None
+    encode.__module__ = "_codecs"
 
     torch.serialization.add_safe_globals([ModelCheckpoint, scalar, dtype, Float64DType, encode])
-    ALWAYS_SAFE_LOAD = True
     logging.info("Checkpoint files will always be loaded safely.")
-else:
-    logging.warning("Warning, you are using an old pytorch version and some ckpt/pt files might be loaded unsafely. Upgrading to 2.4 or above is recommended as older versions of pytorch are no longer supported.")
+
 
 # Current as of safetensors 0.7.0
 _TYPES = {
@@ -139,11 +140,8 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
         if MMAP_TORCH_FILES:
             torch_args["mmap"] = True
 
-        if safe_load or ALWAYS_SAFE_LOAD:
-            pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
-        else:
-            logging.warning("WARNING: loading {} unsafely, upgrade your pytorch to 2.4 or newer to load this file safely.".format(ckpt))
-            pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
+        pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
+
         if "state_dict" in pl_sd:
             sd = pl_sd["state_dict"]
         else:
@@ -674,10 +672,10 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                         "ff_context.linear_in.bias": "txt_mlp.0.bias",
                         "ff_context.linear_out.weight": "txt_mlp.2.weight",
                         "ff_context.linear_out.bias": "txt_mlp.2.bias",
-                        "attn.norm_q.weight": "img_attn.norm.query_norm.scale",
-                        "attn.norm_k.weight": "img_attn.norm.key_norm.scale",
-                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.scale",
-                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.scale",
+                        "attn.norm_q.weight": "img_attn.norm.query_norm.weight",
+                        "attn.norm_k.weight": "img_attn.norm.key_norm.weight",
+                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.weight",
+                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.weight",
                     }
 
         for k in block_map:
@@ -700,8 +698,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                         "norm.linear.bias": "modulation.lin.bias",
                         "proj_out.weight": "linear2.weight",
                         "proj_out.bias": "linear2.bias",
-                        "attn.norm_q.weight": "norm.query_norm.scale",
-                        "attn.norm_k.weight": "norm.key_norm.scale",
+                        "attn.norm_q.weight": "norm.query_norm.weight",
+                        "attn.norm_k.weight": "norm.key_norm.weight",
                         "attn.to_qkv_mlp_proj.weight": "linear1.weight", # Flux 2
                         "attn.to_out.weight": "linear2.weight", # Flux 2
                     }
@@ -1155,6 +1153,32 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
 def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):
     return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)
 
+def model_trange(*args, **kwargs):
+    if comfy.memory_management.aimdo_allocator is None:
+        return trange(*args, **kwargs)
+
+    pbar = trange(*args, **kwargs, smoothing=1.0)
+    pbar._i = 0
+    pbar.set_postfix_str("  Model Initializing ...  ")
+
+    _update = pbar.update
+
+    def warmup_update(n=1):
+        pbar._i += 1
+        if pbar._i == 1:
+            pbar.i1_time = time.time()
+            pbar.set_postfix_str(" Model Initialization complete!  ")
+        elif pbar._i == 2:
+            #bring forward the effective start time based the the diff between first and second iteration
+            #to attempt to remove load overhead from the final step rate estimate.
+            pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time)
+            pbar.set_postfix_str("")
+
+        _update(n)
+
+    pbar.update = warmup_update
+    return pbar
+
 PROGRESS_BAR_ENABLED = True
 def set_progress_bar_enabled(enabled):
     global PROGRESS_BAR_ENABLED
@@ -1376,3 +1400,21 @@ def string_to_seed(data):
             else:
                 crc >>= 1
     return crc ^ 0xFFFFFFFF
+
+def deepcopy_list_dict(obj, memo=None):
+    if memo is None:
+        memo = {}
+
+    obj_id = id(obj)
+    if obj_id in memo:
+        return memo[obj_id]
+
+    if isinstance(obj, dict):
+        res = {deepcopy_list_dict(k, memo): deepcopy_list_dict(v, memo) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        res = [deepcopy_list_dict(i, memo) for i in obj]
+    else:
+        res = obj
+
+    memo[obj_id] = res
+    return res
diff --git a/comfy/weight_adapter/base.py b/comfy/weight_adapter/base.py
index bce89a0e2..d352e066b 100644
--- a/comfy/weight_adapter/base.py
+++ b/comfy/weight_adapter/base.py
@@ -49,6 +49,12 @@ class WeightAdapterBase:
         """
         raise NotImplementedError
 
+    def calculate_shape(
+        self,
+        key
+    ):
+        return None
+
     def calculate_weight(
         self,
         weight,
diff --git a/comfy/weight_adapter/bypass.py b/comfy/weight_adapter/bypass.py
index d4aaf98ca..b9d5ec7d9 100644
--- a/comfy/weight_adapter/bypass.py
+++ b/comfy/weight_adapter/bypass.py
@@ -21,6 +21,7 @@ from typing import Optional, Union
 import torch
 import torch.nn as nn
 
+import comfy.model_management
 from .base import WeightAdapterBase, WeightAdapterTrainBase
 from comfy.patcher_extension import PatcherInjection
 
@@ -181,18 +182,21 @@ class BypassForwardHook:
             )
             return  # Already injected
 
-        # Move adapter weights to module's device to avoid CPU-GPU transfer on every forward
-        device = None
+        # Move adapter weights to compute device (GPU)
+        # Use get_torch_device() instead of module.weight.device because
+        # with offloading, module weights may be on CPU while compute happens on GPU
+        device = comfy.model_management.get_torch_device()
+
+        # Get dtype from module weight if available
         dtype = None
         if hasattr(self.module, "weight") and self.module.weight is not None:
-            device = self.module.weight.device
             dtype = self.module.weight.dtype
-        elif hasattr(self.module, "W_q"):  # Quantized layers might use different attr
-            device = self.module.W_q.device
-            dtype = self.module.W_q.dtype
 
-        if device is not None:
-            self._move_adapter_weights_to_device(device, dtype)
+        # Only use dtype if it's a standard float type, not quantized
+        if dtype is not None and dtype not in (torch.float32, torch.float16, torch.bfloat16):
+            dtype = None
+
+        self._move_adapter_weights_to_device(device, dtype)
 
         self.original_forward = self.module.forward
         self.module.forward = self._bypass_forward
diff --git a/comfy/weight_adapter/lora.py b/comfy/weight_adapter/lora.py
index bc4260a8f..8e1261a12 100644
--- a/comfy/weight_adapter/lora.py
+++ b/comfy/weight_adapter/lora.py
@@ -214,6 +214,13 @@ class LoRAAdapter(WeightAdapterBase):
         else:
             return None
 
+    def calculate_shape(
+        self,
+        key
+    ):
+        reshape = self.weights[5]
+        return tuple(reshape) if reshape is not None else None
+
     def calculate_weight(
         self,
         weight,
diff --git a/comfy_api/feature_flags.py b/comfy_api/feature_flags.py
index de167f037..a90a5ca40 100644
--- a/comfy_api/feature_flags.py
+++ b/comfy_api/feature_flags.py
@@ -14,6 +14,7 @@ SERVER_FEATURE_FLAGS: dict[str, Any] = {
     "supports_preview_metadata": True,
     "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes
     "extension": {"manager": {"supports_v4": True}},
+    "node_replacements": True,
 }
 
 
diff --git a/comfy_api/latest/__init__.py b/comfy_api/latest/__init__.py
index 8542a1dbc..f2399422b 100644
--- a/comfy_api/latest/__init__.py
+++ b/comfy_api/latest/__init__.py
@@ -21,6 +21,17 @@ class ComfyAPI_latest(ComfyAPIBase):
     VERSION = "latest"
     STABLE = False
 
+    def __init__(self):
+        super().__init__()
+        self.node_replacement = self.NodeReplacement()
+        self.execution = self.Execution()
+
+    class NodeReplacement(ProxiedSingleton):
+        async def register(self, node_replace: io.NodeReplace) -> None:
+            """Register a node replacement mapping."""
+            from server import PromptServer
+            PromptServer.instance.node_replace_manager.register(node_replace)
+
     class Execution(ProxiedSingleton):
         async def set_progress(
             self,
@@ -73,8 +84,6 @@ class ComfyAPI_latest(ComfyAPIBase):
                 image=to_display,
             )
 
-    execution: Execution
-
 class ComfyExtension(ABC):
     async def on_load(self) -> None:
         """
diff --git a/comfy_api/latest/_input/video_types.py b/comfy_api/latest/_input/video_types.py
index e634a0311..451e9526e 100644
--- a/comfy_api/latest/_input/video_types.py
+++ b/comfy_api/latest/_input/video_types.py
@@ -34,6 +34,21 @@ class VideoInput(ABC):
         """
         pass
 
+    @abstractmethod
+    def as_trimmed(
+        self,
+        start_time: float | None = None,
+        duration: float | None = None,
+        strict_duration: bool = False,
+    ) -> VideoInput | None:
+        """
+        Create a new VideoInput which is trimmed to have the corresponding start_time and duration
+
+        Returns:
+            A new VideoInput, or None if the result would have negative duration
+        """
+        pass
+
     def get_stream_source(self) -> Union[str, io.BytesIO]:
         """
         Get a streamable source for the video. This allows processing without
diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index 1405d0b81..3463ed1c9 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -6,6 +6,7 @@ from typing import Optional
 from .._input import AudioInput, VideoInput
 import av
 import io
+import itertools
 import json
 import numpy as np
 import math
@@ -29,7 +30,6 @@ def container_to_output_format(container_format: str | None) -> str | None:
     formats = container_format.split(",")
     return formats[0]
 
-
 def get_open_write_kwargs(
     dest: str | io.BytesIO, container_format: str, to_format: str | None
 ) -> dict:
@@ -57,12 +57,14 @@ class VideoFromFile(VideoInput):
     Class representing video input from a file.
     """
 
-    def __init__(self, file: str | io.BytesIO):
+    def __init__(self, file: str | io.BytesIO, *, start_time: float=0, duration: float=0):
         """
         Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object
         containing the file contents.
         """
         self.__file = file
+        self.__start_time = start_time
+        self.__duration = duration
 
     def get_stream_source(self) -> str | io.BytesIO:
         """
@@ -96,6 +98,16 @@ class VideoFromFile(VideoInput):
         Returns:
             Duration in seconds
         """
+        raw_duration = self._get_raw_duration()
+        if self.__start_time < 0:
+            duration_from_start = min(raw_duration, -self.__start_time)
+        else:
+            duration_from_start = raw_duration - self.__start_time
+        if self.__duration:
+            return min(self.__duration, duration_from_start)
+        return duration_from_start
+
+    def _get_raw_duration(self) -> float:
         if isinstance(self.__file, io.BytesIO):
             self.__file.seek(0)
         with av.open(self.__file, mode="r") as container:
@@ -113,9 +125,13 @@ class VideoFromFile(VideoInput):
             if video_stream and video_stream.average_rate:
                 frame_count = 0
                 container.seek(0)
-                for packet in container.demux(video_stream):
-                    for _ in packet.decode():
-                        frame_count += 1
+                frame_iterator = (
+                    container.decode(video_stream)
+                    if video_stream.codec.capabilities & 0x100
+                    else container.demux(video_stream)
+                )
+                for packet in frame_iterator:
+                    frame_count += 1
                 if frame_count > 0:
                     return float(frame_count / video_stream.average_rate)
 
@@ -131,36 +147,54 @@ class VideoFromFile(VideoInput):
 
         with av.open(self.__file, mode="r") as container:
             video_stream = self._get_first_video_stream(container)
-            # 1. Prefer the frames field if available
-            if video_stream.frames and video_stream.frames > 0:
+            # 1. Prefer the frames field if available and usable
+            if (
+                video_stream.frames
+                and video_stream.frames > 0
+                and not self.__start_time
+                and not self.__duration
+            ):
                 return int(video_stream.frames)
 
             # 2. Try to estimate from duration and average_rate using only metadata
-            if container.duration is not None and video_stream.average_rate:
-                duration_seconds = float(container.duration / av.time_base)
-                estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
-                if estimated_frames > 0:
-                    return estimated_frames
-
             if (
                 getattr(video_stream, "duration", None) is not None
                 and getattr(video_stream, "time_base", None) is not None
                 and video_stream.average_rate
             ):
-                duration_seconds = float(video_stream.duration * video_stream.time_base)
+                raw_duration = float(video_stream.duration * video_stream.time_base)
+                if self.__start_time < 0:
+                    duration_from_start = min(raw_duration, -self.__start_time)
+                else:
+                    duration_from_start = raw_duration - self.__start_time
+                duration_seconds = min(self.__duration, duration_from_start)
                 estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
                 if estimated_frames > 0:
                     return estimated_frames
 
             # 3. Last resort: decode frames and count them (streaming)
-            frame_count = 0
-            container.seek(0)
-            for packet in container.demux(video_stream):
-                for _ in packet.decode():
-                    frame_count += 1
-
-            if frame_count == 0:
-                raise ValueError(f"Could not determine frame count for file '{self.__file}'")
+            if self.__start_time < 0:
+                start_time = max(self._get_raw_duration() + self.__start_time, 0)
+            else:
+                start_time = self.__start_time
+            frame_count = 1
+            start_pts = int(start_time / video_stream.time_base)
+            end_pts = int((start_time + self.__duration) / video_stream.time_base)
+            container.seek(start_pts, stream=video_stream)
+            frame_iterator = (
+                container.decode(video_stream)
+                if video_stream.codec.capabilities & 0x100
+                else container.demux(video_stream)
+            )
+            for frame in frame_iterator:
+                if frame.pts >= start_pts:
+                    break
+            else:
+                raise ValueError(f"Could not determine frame count for file '{self.__file}'\nNo frames exist for start_time {self.__start_time}")
+            for frame in frame_iterator:
+                if frame.pts >= end_pts:
+                    break
+                frame_count += 1
             return frame_count
 
     def get_frame_rate(self) -> Fraction:
@@ -199,9 +233,21 @@ class VideoFromFile(VideoInput):
             return container.format.name
 
     def get_components_internal(self, container: InputContainer) -> VideoComponents:
+        video_stream = self._get_first_video_stream(container)
+        if self.__start_time < 0:
+            start_time = max(self._get_raw_duration() + self.__start_time, 0)
+        else:
+            start_time = self.__start_time
         # Get video frames
         frames = []
-        for frame in container.decode(video=0):
+        start_pts = int(start_time / video_stream.time_base)
+        end_pts = int((start_time + self.__duration) / video_stream.time_base)
+        container.seek(start_pts, stream=video_stream)
+        for frame in container.decode(video_stream):
+            if frame.pts < start_pts:
+                continue
+            if self.__duration and frame.pts >= end_pts:
+                break
             img = frame.to_ndarray(format='rgb24')  # shape: (H, W, 3)
             img = torch.from_numpy(img) / 255.0  # shape: (H, W, 3)
             frames.append(img)
@@ -209,31 +255,44 @@ class VideoFromFile(VideoInput):
         images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)
 
         # Get frame rate
-        video_stream = next(s for s in container.streams if s.type == 'video')
-        frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1)
+        frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
 
         # Get audio if available
         audio = None
-        try:
-            container.seek(0)  # Reset the container to the beginning
-            for stream in container.streams:
-                if stream.type != 'audio':
-                    continue
-                assert isinstance(stream, av.AudioStream)
-                audio_frames = []
-                for packet in container.demux(stream):
-                    for frame in packet.decode():
-                        assert isinstance(frame, av.AudioFrame)
-                        audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
-                if len(audio_frames) > 0:
-                    audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
-                    audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
-                    audio = AudioInput({
-                        "waveform": audio_tensor,
-                        "sample_rate": int(stream.sample_rate) if stream.sample_rate else 1,
-                    })
-        except StopIteration:
-            pass  # No audio stream
+        container.seek(start_pts, stream=video_stream)
+        # Use last stream for consistency
+        if len(container.streams.audio):
+            audio_stream = container.streams.audio[-1]
+            audio_frames = []
+            resample = av.audio.resampler.AudioResampler(format='fltp').resample
+            frames = itertools.chain.from_iterable(
+                map(resample, container.decode(audio_stream))
+            )
+
+            has_first_frame = False
+            for frame in frames:
+                offset_seconds = start_time - frame.pts * audio_stream.time_base
+                to_skip = int(offset_seconds * audio_stream.sample_rate)
+                if to_skip < frame.samples:
+                    has_first_frame = True
+                    break
+            if has_first_frame:
+                audio_frames.append(frame.to_ndarray()[..., to_skip:])
+
+            for frame in frames:
+                if frame.time > start_time + self.__duration:
+                    break
+                audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
+            if len(audio_frames) > 0:
+                audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
+                if self.__duration:
+                    audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
+
+                audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
+                audio = AudioInput({
+                    "waveform": audio_tensor,
+                    "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
+                })
 
         metadata = container.metadata
         return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata)
@@ -250,7 +309,7 @@ class VideoFromFile(VideoInput):
         path: str | io.BytesIO,
         format: VideoContainer = VideoContainer.AUTO,
         codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None
+        metadata: Optional[dict] = None,
     ):
         if isinstance(self.__file, io.BytesIO):
             self.__file.seek(0)  # Reset the BytesIO object to the beginning
@@ -262,15 +321,14 @@ class VideoFromFile(VideoInput):
                 reuse_streams = False
             if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
                 reuse_streams = False
+            if self.__start_time or self.__duration:
+                reuse_streams = False
 
             if not reuse_streams:
                 components = self.get_components_internal(container)
                 video = VideoFromComponents(components)
                 return video.save_to(
-                    path,
-                    format=format,
-                    codec=codec,
-                    metadata=metadata
+                    path, format=format, codec=codec, metadata=metadata
                 )
 
             streams = container.streams
@@ -304,10 +362,21 @@ class VideoFromFile(VideoInput):
                         output_container.mux(packet)
 
     def _get_first_video_stream(self, container: InputContainer):
-        video_stream = next((s for s in container.streams if s.type == "video"), None)
-        if video_stream is None:
-            raise ValueError(f"No video stream found in file '{self.__file}'")
-        return video_stream
+        if len(container.streams.video):
+            return container.streams.video[0]
+        raise ValueError(f"No video stream found in file '{self.__file}'")
+
+    def as_trimmed(
+        self, start_time: float = 0, duration: float = 0, strict_duration: bool = True
+    ) -> VideoInput | None:
+        trimmed = VideoFromFile(
+            self.get_stream_source(),
+            start_time=start_time + self.__start_time,
+            duration=duration,
+        )
+        if trimmed.get_duration() < duration and strict_duration:
+            return None
+        return trimmed
 
 
 class VideoFromComponents(VideoInput):
@@ -322,7 +391,7 @@ class VideoFromComponents(VideoInput):
         return VideoComponents(
             images=self.__components.images,
             audio=self.__components.audio,
-            frame_rate=self.__components.frame_rate
+            frame_rate=self.__components.frame_rate,
         )
 
     def save_to(
@@ -330,7 +399,7 @@ class VideoFromComponents(VideoInput):
         path: str,
         format: VideoContainer = VideoContainer.AUTO,
         codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None
+        metadata: Optional[dict] = None,
     ):
         if format != VideoContainer.AUTO and format != VideoContainer.MP4:
             raise ValueError("Only MP4 format is supported for now")
@@ -357,7 +426,10 @@ class VideoFromComponents(VideoInput):
             audio_stream: Optional[av.AudioStream] = None
             if self.__components.audio:
                 audio_sample_rate = int(self.__components.audio['sample_rate'])
-                audio_stream = output.add_stream('aac', rate=audio_sample_rate)
+                waveform = self.__components.audio['waveform']
+                waveform = waveform[0, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
+                layout = {1: 'mono', 2: 'stereo', 6: '5.1'}.get(waveform.shape[0], 'stereo')
+                audio_stream = output.add_stream('aac', rate=audio_sample_rate, layout=layout)
 
             # Encode video
             for i, frame in enumerate(self.__components.images):
@@ -372,12 +444,21 @@ class VideoFromComponents(VideoInput):
             output.mux(packet)
 
             if audio_stream and self.__components.audio:
-                waveform = self.__components.audio['waveform']
-                waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
-                frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().cpu().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo')
+                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().numpy(), format='fltp', layout=layout)
                 frame.sample_rate = audio_sample_rate
                 frame.pts = 0
                 output.mux(audio_stream.encode(frame))
 
                 # Flush encoder
                 output.mux(audio_stream.encode(None))
+
+    def as_trimmed(
+        self,
+        start_time: float | None = None,
+        duration: float | None = None,
+        strict_duration: bool = True,
+    ) -> VideoInput | None:
+        if self.get_duration() < start_time + duration:
+            return None
+        #TODO Consider tracking duration and trimming at time of save?
+        return VideoFromFile(self.get_stream_source(), start_time=start_time, duration=duration)
diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
index 93cf482ca..95d79c035 100644
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -2030,6 +2030,68 @@ class _UIOutput(ABC):
         ...
 
 
+class InputMapOldId(TypedDict):
+    """Map an old node input to a new node input by ID."""
+    new_id: str
+    old_id: str
+
+class InputMapSetValue(TypedDict):
+    """Set a specific value for a new node input."""
+    new_id: str
+    set_value: Any
+
+InputMap = InputMapOldId | InputMapSetValue
+"""
+Input mapping for node replacement. Type is inferred by dictionary keys:
+- {"new_id": str, "old_id": str} - maps old input to new input
+- {"new_id": str, "set_value": Any} - sets a specific value for new input
+"""
+
+class OutputMap(TypedDict):
+    """Map outputs of node replacement via indexes."""
+    new_idx: int
+    old_idx: int
+
+class NodeReplace:
+    """
+    Defines a possible node replacement, mapping inputs and outputs of the old node to the new node.
+
+    Also supports assigning specific values to the input widgets of the new node.
+
+    Args:
+        new_node_id: The class name of the new replacement node.
+        old_node_id: The class name of the deprecated node.
+        old_widget_ids: Ordered list of input IDs for widgets that may not have an input slot
+            connected. The workflow JSON stores widget values by their relative position index,
+            not by ID. This list maps those positional indexes to input IDs, enabling the
+            replacement system to correctly identify widget values during node migration.
+        input_mapping: List of input mappings from old node to new node.
+        output_mapping: List of output mappings from old node to new node.
+    """
+    def __init__(self,
+        new_node_id: str,
+        old_node_id: str,
+        old_widget_ids: list[str] | None=None,
+        input_mapping: list[InputMap] | None=None,
+        output_mapping: list[OutputMap] | None=None,
+    ):
+        self.new_node_id = new_node_id
+        self.old_node_id = old_node_id
+        self.old_widget_ids = old_widget_ids
+        self.input_mapping = input_mapping
+        self.output_mapping = output_mapping
+
+    def as_dict(self):
+        """Create serializable representation of the node replacement."""
+        return {
+            "new_node_id": self.new_node_id,
+            "old_node_id": self.old_node_id,
+            "old_widget_ids": self.old_widget_ids,
+            "input_mapping": list(self.input_mapping) if self.input_mapping else None,
+            "output_mapping": list(self.output_mapping) if self.output_mapping else None,
+        }
+
+
 __all__ = [
     "FolderType",
     "UploadType",
@@ -2121,4 +2183,5 @@ __all__ = [
     "ImageCompare",
     "PriceBadgeDepends",
     "PriceBadge",
+    "NodeReplace",
 ]
diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py
index ee2aa1ce6..46a583b5e 100644
--- a/comfy_api_nodes/apis/__init__.py
+++ b/comfy_api_nodes/apis/__init__.py
@@ -1197,12 +1197,6 @@ class KlingImageGenImageReferenceType(str, Enum):
     face = 'face'
 
 
-class KlingImageGenModelName(str, Enum):
-    kling_v1 = 'kling-v1'
-    kling_v1_5 = 'kling-v1-5'
-    kling_v2 = 'kling-v2'
-
-
 class KlingImageGenerationsRequest(BaseModel):
     aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9'
     callback_url: Optional[AnyUrl] = Field(
@@ -1218,7 +1212,7 @@ class KlingImageGenerationsRequest(BaseModel):
         0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0
     )
     image_reference: Optional[KlingImageGenImageReferenceType] = None
-    model_name: Optional[KlingImageGenModelName] = 'kling-v1'
+    model_name: str = Field(...)
     n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9)
     negative_prompt: Optional[str] = Field(
         None, description='Negative text prompt', max_length=200
diff --git a/comfy_api_nodes/apis/bria.py b/comfy_api_nodes/apis/bria.py
index 9119cacc6..8c496b56c 100644
--- a/comfy_api_nodes/apis/bria.py
+++ b/comfy_api_nodes/apis/bria.py
@@ -45,17 +45,55 @@ class BriaEditImageRequest(BaseModel):
     )
 
 
+class BriaRemoveBackgroundRequest(BaseModel):
+    image: str = Field(...)
+    sync: bool = Field(False)
+    visual_input_content_moderation: bool = Field(
+        False, description="If true, returns 422 on input image moderation failure."
+    )
+    visual_output_content_moderation: bool = Field(
+        False, description="If true, returns 422 on visual output moderation failure."
+    )
+    seed: int = Field(...)
+
+
 class BriaStatusResponse(BaseModel):
     request_id: str = Field(...)
     status_url: str = Field(...)
     warning: str | None = Field(None)
 
 
-class BriaResult(BaseModel):
+class BriaRemoveBackgroundResult(BaseModel):
+    image_url: str = Field(...)
+
+
+class BriaRemoveBackgroundResponse(BaseModel):
+    status: str = Field(...)
+    result: BriaRemoveBackgroundResult | None = Field(None)
+
+
+class BriaImageEditResult(BaseModel):
     structured_prompt: str = Field(...)
     image_url: str = Field(...)
 
 
-class BriaResponse(BaseModel):
+class BriaImageEditResponse(BaseModel):
     status: str = Field(...)
-    result: BriaResult | None = Field(None)
+    result: BriaImageEditResult | None = Field(None)
+
+
+class BriaRemoveVideoBackgroundRequest(BaseModel):
+    video: str = Field(...)
+    background_color: str = Field(default="transparent", description="Background color for the output video.")
+    output_container_and_codec: str = Field(...)
+    preserve_audio: bool = Field(True)
+    seed: int = Field(...)
+
+
+class BriaRemoveVideoBackgroundResult(BaseModel):
+    video_url: str = Field(...)
+
+
+class BriaRemoveVideoBackgroundResponse(BaseModel):
+    status: str = Field(...)
+    result: BriaRemoveVideoBackgroundResult | None = Field(None)
diff --git a/comfy_api_nodes/apis/hunyuan3d.py b/comfy_api_nodes/apis/hunyuan3d.py
index 6421c9bd5..e84eba31e 100644
--- a/comfy_api_nodes/apis/hunyuan3d.py
+++ b/comfy_api_nodes/apis/hunyuan3d.py
@@ -64,3 +64,23 @@ class To3DProTaskResultResponse(BaseModel):
 
 class To3DProTaskQueryRequest(BaseModel):
     JobId: str = Field(...)
+
+
+class To3DUVFileInput(BaseModel):
+    Type: str = Field(..., description="File type: GLB, OBJ, or FBX")
+    Url: str = Field(...)
+
+
+class To3DUVTaskRequest(BaseModel):
+    File: To3DUVFileInput = Field(...)
+
+
+class TextureEditImageInfo(BaseModel):
+    Url: str = Field(...)
+
+
+class TextureEditTaskRequest(BaseModel):
+    File3D: To3DUVFileInput = Field(...)
+    Image: TextureEditImageInfo | None = Field(None)
+    Prompt: str | None = Field(None)
+    EnablePBR: bool | None = Field(None)
diff --git a/comfy_api_nodes/apis/kling.py b/comfy_api_nodes/apis/kling.py
index bf54ede3e..9c0446075 100644
--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@@ -1,12 +1,22 @@
 from pydantic import BaseModel, Field
 
 
+class MultiPromptEntry(BaseModel):
+    index: int = Field(...)
+    prompt: str = Field(...)
+    duration: str = Field(...)
+
+
 class OmniProText2VideoRequest(BaseModel):
     model_name: str = Field(..., description="kling-video-o1")
     aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'")
     duration: str = Field(..., description="'5' or '10'")
     prompt: str = Field(...)
     mode: str = Field("pro")
+    multi_shot: bool | None = Field(None)
+    multi_prompt: list[MultiPromptEntry] | None = Field(None)
+    shot_type: str | None = Field(None)
+    sound: str = Field(..., description="'on' or 'off'")
 
 
 class OmniParamImage(BaseModel):
@@ -26,6 +36,10 @@ class OmniProFirstLastFrameRequest(BaseModel):
     duration: str = Field(..., description="'5' or '10'")
     prompt: str = Field(...)
     mode: str = Field("pro")
+    sound: str | None = Field(None, description="'on' or 'off'")
+    multi_shot: bool | None = Field(None)
+    multi_prompt: list[MultiPromptEntry] | None = Field(None)
+    shot_type: str | None = Field(None)
 
 
 class OmniProReferences2VideoRequest(BaseModel):
@@ -38,6 +52,10 @@ class OmniProReferences2VideoRequest(BaseModel):
     duration: str | None = Field(..., description="From 3 to 10.")
     prompt: str = Field(...)
     mode: str = Field("pro")
+    sound: str | None = Field(None, description="'on' or 'off'")
+    multi_shot: bool | None = Field(None)
+    multi_prompt: list[MultiPromptEntry] | None = Field(None)
+    shot_type: str | None = Field(None)
 
 
 class TaskStatusVideoResult(BaseModel):
@@ -54,6 +72,7 @@ class TaskStatusImageResult(BaseModel):
 class TaskStatusResults(BaseModel):
     videos: list[TaskStatusVideoResult] | None = Field(None)
     images: list[TaskStatusImageResult] | None = Field(None)
+    series_images: list[TaskStatusImageResult] | None = Field(None)
 
 
 class TaskStatusResponseData(BaseModel):
@@ -77,31 +96,42 @@ class OmniImageParamImage(BaseModel):
 
 
 class OmniProImageRequest(BaseModel):
-    model_name: str = Field(..., description="kling-image-o1")
-    resolution: str = Field(..., description="'1k' or '2k'")
+    model_name: str = Field(...)
+    resolution: str = Field(...)
     aspect_ratio: str | None = Field(...)
     prompt: str = Field(...)
     mode: str = Field("pro")
     n: int | None = Field(1, le=9)
     image_list: list[OmniImageParamImage] | None = Field(..., max_length=10)
+    result_type: str | None = Field(None, description="Set to 'series' for series generation")
+    series_amount: int | None = Field(None, ge=2, le=9, description="Number of images in a series")
 
 
 class TextToVideoWithAudioRequest(BaseModel):
-    model_name: str = Field(..., description="kling-v2-6")
+    model_name: str = Field(...)
     aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'")
-    duration: str = Field(..., description="'5' or '10'")
-    prompt: str = Field(...)
+    duration: str = Field(...)
+    prompt: str | None = Field(...)
+    negative_prompt: str | None = Field(None)
     mode: str = Field("pro")
     sound: str = Field(..., description="'on' or 'off'")
+    multi_shot: bool | None = Field(None)
+    multi_prompt: list[MultiPromptEntry] | None = Field(None)
+    shot_type: str | None = Field(None)
 
 
 class ImageToVideoWithAudioRequest(BaseModel):
-    model_name: str = Field(..., description="kling-v2-6")
+    model_name: str = Field(...)
     image: str = Field(...)
-    duration: str = Field(..., description="'5' or '10'")
-    prompt: str = Field(...)
+    image_tail: str | None = Field(None)
+    duration: str = Field(...)
+    prompt: str | None = Field(...)
+    negative_prompt: str | None = Field(None)
     mode: str = Field("pro")
     sound: str = Field(..., description="'on' or 'off'")
+    multi_shot: bool | None = Field(None)
+    multi_prompt: list[MultiPromptEntry] | None = Field(None)
+    shot_type: str | None = Field(None)
 
 
 class MotionControlRequest(BaseModel):
diff --git a/comfy_api_nodes/nodes_bria.py b/comfy_api_nodes/nodes_bria.py
index d3a52bc1b..4044ee3ea 100644
--- a/comfy_api_nodes/nodes_bria.py
+++ b/comfy_api_nodes/nodes_bria.py
@@ -3,7 +3,11 @@ from typing_extensions import override
 from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.bria import (
     BriaEditImageRequest,
-    BriaResponse,
+    BriaRemoveBackgroundRequest,
+    BriaRemoveBackgroundResponse,
+    BriaRemoveVideoBackgroundRequest,
+    BriaRemoveVideoBackgroundResponse,
+    BriaImageEditResponse,
     BriaStatusResponse,
     InputModerationSettings,
 )
@@ -11,10 +15,12 @@ from comfy_api_nodes.util import (
     ApiEndpoint,
     convert_mask_to_image,
     download_url_to_image_tensor,
-    get_number_of_images,
+    download_url_to_video_output,
     poll_op,
     sync_op,
-    upload_images_to_comfyapi,
+    upload_image_to_comfyapi,
+    upload_video_to_comfyapi,
+    validate_video_duration,
 )
 
 
@@ -73,21 +79,15 @@ class BriaImageEditNode(IO.ComfyNode):
                 IO.DynamicCombo.Input(
                     "moderation",
                     options=[
+                        IO.DynamicCombo.Option("false", []),
                         IO.DynamicCombo.Option(
                             "true",
                             [
-                                IO.Boolean.Input(
-                                    "prompt_content_moderation", default=False
-                                ),
-                                IO.Boolean.Input(
-                                    "visual_input_moderation", default=False
-                                ),
-                                IO.Boolean.Input(
-                                    "visual_output_moderation", default=True
-                                ),
+                                IO.Boolean.Input("prompt_content_moderation", default=False),
+                                IO.Boolean.Input("visual_input_moderation", default=False),
+                                IO.Boolean.Input("visual_output_moderation", default=True),
                             ],
                         ),
-                        IO.DynamicCombo.Option("false", []),
                     ],
                     tooltip="Moderation settings",
                 ),
@@ -127,50 +127,26 @@ class BriaImageEditNode(IO.ComfyNode):
         mask: Input.Image | None = None,
     ) -> IO.NodeOutput:
         if not prompt and not structured_prompt:
-            raise ValueError(
-                "One of prompt or structured_prompt is required to be non-empty."
-            )
-        if get_number_of_images(image) != 1:
-            raise ValueError("Exactly one input image is required.")
+            raise ValueError("One of prompt or structured_prompt is required to be non-empty.")
         mask_url = None
         if mask is not None:
-            mask_url = (
-                await upload_images_to_comfyapi(
-                    cls,
-                    convert_mask_to_image(mask),
-                    max_images=1,
-                    mime_type="image/png",
-                    wait_label="Uploading mask",
-                )
-            )[0]
+            mask_url = await upload_image_to_comfyapi(cls, convert_mask_to_image(mask), wait_label="Uploading mask")
         response = await sync_op(
             cls,
             ApiEndpoint(path="proxy/bria/v2/image/edit", method="POST"),
             data=BriaEditImageRequest(
                 instruction=prompt if prompt else None,
                 structured_instruction=structured_prompt if structured_prompt else None,
-                images=await upload_images_to_comfyapi(
-                    cls,
-                    image,
-                    max_images=1,
-                    mime_type="image/png",
-                    wait_label="Uploading image",
-                ),
+                images=[await upload_image_to_comfyapi(cls, image, wait_label="Uploading image")],
                 mask=mask_url,
                 negative_prompt=negative_prompt if negative_prompt else None,
                 guidance_scale=guidance_scale,
                 seed=seed,
                 model_version=model,
                 steps_num=steps,
-                prompt_content_moderation=moderation.get(
-                    "prompt_content_moderation", False
-                ),
-                visual_input_content_moderation=moderation.get(
-                    "visual_input_moderation", False
-                ),
-                visual_output_content_moderation=moderation.get(
-                    "visual_output_moderation", False
-                ),
+                prompt_content_moderation=moderation.get("prompt_content_moderation", False),
+                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
+                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
             ),
             response_model=BriaStatusResponse,
         )
@@ -178,7 +154,7 @@ class BriaImageEditNode(IO.ComfyNode):
             cls,
             ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
             status_extractor=lambda r: r.status,
-            response_model=BriaResponse,
+            response_model=BriaImageEditResponse,
         )
         return IO.NodeOutput(
             await download_url_to_image_tensor(response.result.image_url),
@@ -186,11 +162,167 @@ class BriaImageEditNode(IO.ComfyNode):
         )
 
 
+class BriaRemoveImageBackground(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="BriaRemoveImageBackground",
+            display_name="Bria Remove Image Background",
+            category="api node/image/Bria",
+            description="Remove the background from an image using Bria RMBG 2.0.",
+            inputs=[
+                IO.Image.Input("image"),
+                IO.DynamicCombo.Input(
+                    "moderation",
+                    options=[
+                        IO.DynamicCombo.Option("false", []),
+                        IO.DynamicCombo.Option(
+                            "true",
+                            [
+                                IO.Boolean.Input("visual_input_moderation", default=False),
+                                IO.Boolean.Input("visual_output_moderation", default=True),
+                            ],
+                        ),
+                    ],
+                    tooltip="Moderation settings",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.018}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        image: Input.Image,
+        moderation: dict,
+        seed: int,
+    ) -> IO.NodeOutput:
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/bria/v2/image/edit/remove_background", method="POST"),
+            data=BriaRemoveBackgroundRequest(
+                image=await upload_image_to_comfyapi(cls, image, wait_label="Uploading image"),
+                sync=False,
+                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
+                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
+                seed=seed,
+            ),
+            response_model=BriaStatusResponse,
+        )
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
+            status_extractor=lambda r: r.status,
+            response_model=BriaRemoveBackgroundResponse,
+        )
+        return IO.NodeOutput(await download_url_to_image_tensor(response.result.image_url))
+
+
+class BriaRemoveVideoBackground(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="BriaRemoveVideoBackground",
+            display_name="Bria Remove Video Background",
+            category="api node/video/Bria",
+            description="Remove the background from a video using Bria. ",
+            inputs=[
+                IO.Video.Input("video"),
+                IO.Combo.Input(
+                    "background_color",
+                    options=[
+                        "Black",
+                        "White",
+                        "Gray",
+                        "Red",
+                        "Green",
+                        "Blue",
+                        "Yellow",
+                        "Cyan",
+                        "Magenta",
+                        "Orange",
+                    ],
+                    tooltip="Background color for the output video.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        video: Input.Video,
+        background_color: str,
+        seed: int,
+    ) -> IO.NodeOutput:
+        validate_video_duration(video, max_duration=60.0)
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"),
+            data=BriaRemoveVideoBackgroundRequest(
+                video=await upload_video_to_comfyapi(cls, video),
+                background_color=background_color,
+                output_container_and_codec="mp4_h264",
+                seed=seed,
+            ),
+            response_model=BriaStatusResponse,
+        )
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
+            status_extractor=lambda r: r.status,
+            response_model=BriaRemoveVideoBackgroundResponse,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
+
+
 class BriaExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
         return [
             BriaImageEditNode,
+            BriaRemoveImageBackground,
+            BriaRemoveVideoBackground,
         ]
 
 
diff --git a/comfy_api_nodes/nodes_hunyuan3d.py b/comfy_api_nodes/nodes_hunyuan3d.py
index 813a7c809..ca002cc60 100644
--- a/comfy_api_nodes/nodes_hunyuan3d.py
+++ b/comfy_api_nodes/nodes_hunyuan3d.py
@@ -1,31 +1,48 @@
 from typing_extensions import override
 
-from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api.latest import IO, ComfyExtension, Input, Types
 from comfy_api_nodes.apis.hunyuan3d import (
     Hunyuan3DViewImage,
     InputGenerateType,
     ResultFile3D,
+    TextureEditTaskRequest,
     To3DProTaskCreateResponse,
     To3DProTaskQueryRequest,
     To3DProTaskRequest,
     To3DProTaskResultResponse,
+    To3DUVFileInput,
+    To3DUVTaskRequest,
 )
 from comfy_api_nodes.util import (
     ApiEndpoint,
     download_url_to_file_3d,
+    download_url_to_image_tensor,
     downscale_image_tensor_by_max_side,
     poll_op,
     sync_op,
+    upload_3d_model_to_comfyapi,
     upload_image_to_comfyapi,
     validate_image_dimensions,
     validate_string,
 )
 
 
-def get_file_from_response(response_objs: list[ResultFile3D], file_type: str) -> ResultFile3D | None:
+def _is_tencent_rate_limited(status: int, body: object) -> bool:
+    return (
+        status == 400
+        and isinstance(body, dict)
+        and "RequestLimitExceeded" in str(body.get("Response", {}).get("Error", {}).get("Code", ""))
+    )
+
+
+def get_file_from_response(
+    response_objs: list[ResultFile3D], file_type: str, raise_if_not_found: bool = True
+) -> ResultFile3D | None:
     for i in response_objs:
         if i.Type.lower() == file_type.lower():
             return i
+    if raise_if_not_found:
+        raise ValueError(f"'{file_type}' file type is not found in the response.")
     return None
 
 
@@ -35,7 +52,7 @@ class TencentTextToModelNode(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="TencentTextToModelNode",
-            display_name="Hunyuan3D: Text to Model (Pro)",
+            display_name="Hunyuan3D: Text to Model",
             category="api node/3d/Tencent",
             inputs=[
                 IO.Combo.Input(
@@ -120,6 +137,7 @@ class TencentTextToModelNode(IO.ComfyNode):
                 EnablePBR=generate_type.get("pbr", None),
                 PolygonType=generate_type.get("polygon_type", None),
             ),
+            is_rate_limited=_is_tencent_rate_limited,
         )
         if response.Error:
             raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
@@ -131,11 +149,14 @@ class TencentTextToModelNode(IO.ComfyNode):
             response_model=To3DProTaskResultResponse,
             status_extractor=lambda r: r.Status,
         )
-        glb_result = get_file_from_response(result.ResultFile3Ds, "glb")
-        obj_result = get_file_from_response(result.ResultFile3Ds, "obj")
-        file_glb = await download_url_to_file_3d(glb_result.Url, "glb", task_id=task_id) if glb_result else None
         return IO.NodeOutput(
-            file_glb, file_glb, await download_url_to_file_3d(obj_result.Url, "obj", task_id=task_id) if obj_result else None
+            f"{task_id}.glb",
+            await download_url_to_file_3d(
+                get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id
+            ),
+            await download_url_to_file_3d(
+                get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id
+            ),
         )
 
 
@@ -145,7 +166,7 @@ class TencentImageToModelNode(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="TencentImageToModelNode",
-            display_name="Hunyuan3D: Image(s) to Model (Pro)",
+            display_name="Hunyuan3D: Image(s) to Model",
             category="api node/3d/Tencent",
             inputs=[
                 IO.Combo.Input(
@@ -268,6 +289,7 @@ class TencentImageToModelNode(IO.ComfyNode):
                 EnablePBR=generate_type.get("pbr", None),
                 PolygonType=generate_type.get("polygon_type", None),
             ),
+            is_rate_limited=_is_tencent_rate_limited,
         )
         if response.Error:
             raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
@@ -279,11 +301,257 @@ class TencentImageToModelNode(IO.ComfyNode):
             response_model=To3DProTaskResultResponse,
             status_extractor=lambda r: r.Status,
         )
-        glb_result = get_file_from_response(result.ResultFile3Ds, "glb")
-        obj_result = get_file_from_response(result.ResultFile3Ds, "obj")
-        file_glb = await download_url_to_file_3d(glb_result.Url, "glb", task_id=task_id) if glb_result else None
         return IO.NodeOutput(
-            file_glb, file_glb, await download_url_to_file_3d(obj_result.Url, "obj", task_id=task_id) if obj_result else None
+            f"{task_id}.glb",
+            await download_url_to_file_3d(
+                get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id
+            ),
+            await download_url_to_file_3d(
+                get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id
+            ),
+        )
+
+
+class TencentModelTo3DUVNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="TencentModelTo3DUVNode",
+            display_name="Hunyuan3D: Model to UV",
+            category="api node/3d/Tencent",
+            description="Perform UV unfolding on a 3D model to generate UV texture. "
+            "Input model must have less than 30000 faces.",
+            inputs=[
+                IO.MultiType.Input(
+                    "model_3d",
+                    types=[IO.File3DGLB, IO.File3DOBJ, IO.File3DFBX, IO.File3DAny],
+                    tooltip="Input 3D model (GLB, OBJ, or FBX)",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.File3DOBJ.Output(display_name="OBJ"),
+                IO.File3DFBX.Output(display_name="FBX"),
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.2}'),
+        )
+
+    SUPPORTED_FORMATS = {"glb", "obj", "fbx"}
+
+    @classmethod
+    async def execute(
+        cls,
+        model_3d: Types.File3D,
+        seed: int,
+    ) -> IO.NodeOutput:
+        _ = seed
+        file_format = model_3d.format.lower()
+        if file_format not in cls.SUPPORTED_FORMATS:
+            raise ValueError(
+                f"Unsupported file format: '{file_format}'. "
+                f"Supported formats: {', '.join(sorted(cls.SUPPORTED_FORMATS))}."
+            )
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv", method="POST"),
+            response_model=To3DProTaskCreateResponse,
+            data=To3DUVTaskRequest(
+                File=To3DUVFileInput(
+                    Type=file_format.upper(),
+                    Url=await upload_3d_model_to_comfyapi(cls, model_3d, file_format),
+                )
+            ),
+            is_rate_limited=_is_tencent_rate_limited,
+        )
+        if response.Error:
+            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
+        result = await poll_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv/query", method="POST"),
+            data=To3DProTaskQueryRequest(JobId=response.JobId),
+            response_model=To3DProTaskResultResponse,
+            status_extractor=lambda r: r.Status,
+        )
+        return IO.NodeOutput(
+            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj"),
+            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
+            await download_url_to_image_tensor(get_file_from_response(result.ResultFile3Ds, "image").Url),
+        )
+
+
+class Tencent3DTextureEditNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Tencent3DTextureEditNode",
+            display_name="Hunyuan3D: 3D Texture Edit",
+            category="api node/3d/Tencent",
+            description="After inputting the 3D model, perform 3D model texture redrawing.",
+            inputs=[
+                IO.MultiType.Input(
+                    "model_3d",
+                    types=[IO.File3DFBX, IO.File3DAny],
+                    tooltip="3D model in FBX format. Model should have less than 100000 faces.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Describes texture editing. Supports up to 1024 UTF-8 characters.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.File3DGLB.Output(display_name="GLB"),
+                IO.File3DFBX.Output(display_name="FBX"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd": 0.6}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model_3d: Types.File3D,
+        prompt: str,
+        seed: int,
+    ) -> IO.NodeOutput:
+        _ = seed
+        file_format = model_3d.format.lower()
+        if file_format != "fbx":
+            raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.")
+        validate_string(prompt, field_name="prompt", min_length=1, max_length=1024)
+        model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit", method="POST"),
+            response_model=To3DProTaskCreateResponse,
+            data=TextureEditTaskRequest(
+                File3D=To3DUVFileInput(Type=file_format.upper(), Url=model_url),
+                Prompt=prompt,
+                EnablePBR=True,
+            ),
+            is_rate_limited=_is_tencent_rate_limited,
+        )
+        if response.Error:
+            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
+
+        result = await poll_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit/query", method="POST"),
+            data=To3DProTaskQueryRequest(JobId=response.JobId),
+            response_model=To3DProTaskResultResponse,
+            status_extractor=lambda r: r.Status,
+        )
+        return IO.NodeOutput(
+            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb"),
+            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
+        )
+
+
+class Tencent3DPartNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Tencent3DPartNode",
+            display_name="Hunyuan3D: 3D Part",
+            category="api node/3d/Tencent",
+            description="Automatically perform component identification and generation based on the model structure.",
+            inputs=[
+                IO.MultiType.Input(
+                    "model_3d",
+                    types=[IO.File3DFBX, IO.File3DAny],
+                    tooltip="3D model in FBX format. Model should have less than 30000 faces.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.File3DFBX.Output(display_name="FBX"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.6}'),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model_3d: Types.File3D,
+        seed: int,
+    ) -> IO.NodeOutput:
+        _ = seed
+        file_format = model_3d.format.lower()
+        if file_format != "fbx":
+            raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.")
+        model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part", method="POST"),
+            response_model=To3DProTaskCreateResponse,
+            data=To3DUVTaskRequest(
+                File=To3DUVFileInput(Type=file_format.upper(), Url=model_url),
+            ),
+            is_rate_limited=_is_tencent_rate_limited,
+        )
+        if response.Error:
+            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
+        result = await poll_op(
+            cls,
+            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part/query", method="POST"),
+            data=To3DProTaskQueryRequest(JobId=response.JobId),
+            response_model=To3DProTaskResultResponse,
+            status_extractor=lambda r: r.Status,
+        )
+        return IO.NodeOutput(
+            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
         )
 
 
@@ -293,6 +561,9 @@ class TencentHunyuan3DExtension(ComfyExtension):
         return [
             TencentTextToModelNode,
             TencentImageToModelNode,
+            # TencentModelTo3DUVNode,
+            # Tencent3DTextureEditNode,
+            Tencent3DPartNode,
         ]
 
 
diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index 739fe1855..b89c85561 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -38,7 +38,6 @@ from comfy_api_nodes.apis import (
     KlingImageGenerationsRequest,
     KlingImageGenerationsResponse,
     KlingImageGenImageReferenceType,
-    KlingImageGenModelName,
     KlingImageGenAspectRatio,
     KlingVideoEffectsRequest,
     KlingVideoEffectsResponse,
@@ -52,6 +51,7 @@ from comfy_api_nodes.apis import (
 from comfy_api_nodes.apis.kling import (
     ImageToVideoWithAudioRequest,
     MotionControlRequest,
+    MultiPromptEntry,
     OmniImageParamImage,
     OmniParamImage,
     OmniParamVideo,
@@ -71,6 +71,7 @@ from comfy_api_nodes.util import (
     sync_op,
     tensor_to_base64_string,
     upload_audio_to_comfyapi,
+    upload_image_to_comfyapi,
     upload_images_to_comfyapi,
     upload_video_to_comfyapi,
     validate_image_aspect_ratio,
@@ -80,6 +81,31 @@ from comfy_api_nodes.util import (
     validate_video_duration,
 )
 
+
+def _generate_storyboard_inputs(count: int) -> list:
+    inputs = []
+    for i in range(1, count + 1):
+        inputs.extend(
+            [
+                IO.String.Input(
+                    f"storyboard_{i}_prompt",
+                    multiline=True,
+                    default="",
+                    tooltip=f"Prompt for storyboard segment {i}. Max 512 characters.",
+                ),
+                IO.Int.Input(
+                    f"storyboard_{i}_duration",
+                    default=4,
+                    min=1,
+                    max=15,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip=f"Duration for storyboard segment {i} in seconds.",
+                ),
+            ]
+        )
+    return inputs
+
+
 KLING_API_VERSION = "v1"
 PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video"
 PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video"
@@ -820,20 +846,48 @@ class OmniProTextToVideoNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProTextToVideoNode",
-            display_name="Kling Omni Text to Video (Pro)",
+            display_name="Kling 3.0 Omni Text to Video",
             category="api node/video/Kling",
             description="Use text prompts to generate videos with the latest Kling model.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-video-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     tooltip="A text prompt describing the video content. "
-                    "This can include both positive and negative descriptions.",
+                    "This can include both positive and negative descriptions. "
+                    "Ignored when storyboards are enabled.",
                 ),
                 IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
-                IO.Combo.Input("duration", options=[5, 10]),
+                IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider),
                 IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.DynamicCombo.Input(
+                    "storyboards",
+                    options=[
+                        IO.DynamicCombo.Option("disabled", []),
+                        IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)),
+                        IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)),
+                        IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)),
+                        IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)),
+                        IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)),
+                        IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)),
+                    ],
+                    tooltip="Generate a series of video segments with individual prompts and durations. "
+                    "Ignored for o1 model.",
+                    optional=True,
+                ),
+                IO.Boolean.Input("generate_audio", default=False, optional=True),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Video.Output(),
@@ -845,11 +899,15 @@ class OmniProTextToVideoNode(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
                   $mode := (widgets.resolution = "720p") ? "std" : "pro";
-                  $rates := {"std": 0.084, "pro": 0.112};
+                  $isV3 := $contains(widgets.model_name, "v3");
+                  $audio := $isV3 and widgets.generate_audio;
+                  $rates := $audio
+                    ? {"std": 0.112, "pro": 0.14}
+                    : {"std": 0.084, "pro": 0.112};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -864,8 +922,45 @@ class OmniProTextToVideoNode(IO.ComfyNode):
         aspect_ratio: str,
         duration: int,
         resolution: str = "1080p",
+        storyboards: dict | None = None,
+        generate_audio: bool = False,
+        seed: int = 0,
     ) -> IO.NodeOutput:
-        validate_string(prompt, min_length=1, max_length=2500)
+        _ = seed
+        if model_name == "kling-video-o1":
+            if duration not in (5, 10):
+                raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.")
+            if generate_audio:
+                raise ValueError("kling-video-o1 does not support audio generation.")
+        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
+        if stories_enabled and model_name == "kling-video-o1":
+            raise ValueError("kling-video-o1 does not support storyboards.")
+        validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500)
+
+        multi_shot = None
+        multi_prompt_list = None
+        if stories_enabled:
+            count = int(storyboards["storyboards"].split()[0])
+            multi_shot = True
+            multi_prompt_list = []
+            for i in range(1, count + 1):
+                sb_prompt = storyboards[f"storyboard_{i}_prompt"]
+                sb_duration = storyboards[f"storyboard_{i}_duration"]
+                validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512)
+                multi_prompt_list.append(
+                    MultiPromptEntry(
+                        index=i,
+                        prompt=sb_prompt,
+                        duration=str(sb_duration),
+                    )
+                )
+            total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list)
+            if total_storyboard_duration != duration:
+                raise ValueError(
+                    f"Total storyboard duration ({total_storyboard_duration}s) "
+                    f"must equal the global duration ({duration}s)."
+                )
+
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@@ -876,6 +971,10 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                 aspect_ratio=aspect_ratio,
                 duration=str(duration),
                 mode="pro" if resolution == "1080p" else "std",
+                multi_shot=multi_shot,
+                multi_prompt=multi_prompt_list,
+                shot_type="customize" if multi_shot else None,
+                sound="on" if generate_audio else "off",
             ),
         )
         return await finish_omni_video_task(cls, response)
@@ -887,24 +986,26 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProFirstLastFrameNode",
-            display_name="Kling Omni First-Last-Frame to Video (Pro)",
+            display_name="Kling 3.0 Omni First-Last-Frame to Video",
             category="api node/video/Kling",
             description="Use a start frame, an optional end frame, or reference images with the latest Kling model.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-video-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     tooltip="A text prompt describing the video content. "
-                    "This can include both positive and negative descriptions.",
+                    "This can include both positive and negative descriptions. "
+                    "Ignored when storyboards are enabled.",
                 ),
-                IO.Int.Input("duration", default=5, min=3, max=10, display_mode=IO.NumberDisplay.slider),
+                IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider),
                 IO.Image.Input("first_frame"),
                 IO.Image.Input(
                     "end_frame",
                     optional=True,
                     tooltip="An optional end frame for the video. "
-                    "This cannot be used simultaneously with 'reference_images'.",
+                    "This cannot be used simultaneously with 'reference_images'. "
+                    "Does not work with storyboards.",
                 ),
                 IO.Image.Input(
                     "reference_images",
@@ -912,6 +1013,38 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                     tooltip="Up to 6 additional reference images.",
                 ),
                 IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.DynamicCombo.Input(
+                    "storyboards",
+                    options=[
+                        IO.DynamicCombo.Option("disabled", []),
+                        IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)),
+                        IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)),
+                        IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)),
+                        IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)),
+                        IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)),
+                        IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)),
+                    ],
+                    tooltip="Generate a series of video segments with individual prompts and durations. "
+                    "Only supported for kling-v3-omni.",
+                    optional=True,
+                ),
+                IO.Boolean.Input(
+                    "generate_audio",
+                    default=False,
+                    optional=True,
+                    tooltip="Generate audio for the video. Only supported for kling-v3-omni.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Video.Output(),
@@ -923,11 +1056,15 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
                   $mode := (widgets.resolution = "720p") ? "std" : "pro";
-                  $rates := {"std": 0.084, "pro": 0.112};
+                  $isV3 := $contains(widgets.model_name, "v3");
+                  $audio := $isV3 and widgets.generate_audio;
+                  $rates := $audio
+                    ? {"std": 0.112, "pro": 0.14}
+                    : {"std": 0.084, "pro": 0.112};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -944,15 +1081,59 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
         end_frame: Input.Image | None = None,
         reference_images: Input.Image | None = None,
         resolution: str = "1080p",
+        storyboards: dict | None = None,
+        generate_audio: bool = False,
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
+        if model_name == "kling-video-o1":
+            if duration > 10:
+                raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
+            if generate_audio:
+                raise ValueError("kling-video-o1 does not support audio generation.")
+        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
+        if stories_enabled and model_name == "kling-video-o1":
+            raise ValueError("kling-video-o1 does not support storyboards.")
         prompt = normalize_omni_prompt_references(prompt)
-        validate_string(prompt, min_length=1, max_length=2500)
+        validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500)
         if end_frame is not None and reference_images is not None:
             raise ValueError("The 'end_frame' input cannot be used simultaneously with 'reference_images'.")
-        if duration not in (5, 10) and end_frame is None and reference_images is None:
+        if end_frame is not None and stories_enabled:
+            raise ValueError("The 'end_frame' input cannot be used simultaneously with storyboards.")
+        if (
+            model_name == "kling-video-o1"
+            and duration not in (5, 10)
+            and end_frame is None
+            and reference_images is None
+        ):
             raise ValueError(
                 "Duration is only supported for 5 or 10 seconds if there is no end frame or reference images."
             )
+
+        multi_shot = None
+        multi_prompt_list = None
+        if stories_enabled:
+            count = int(storyboards["storyboards"].split()[0])
+            multi_shot = True
+            multi_prompt_list = []
+            for i in range(1, count + 1):
+                sb_prompt = storyboards[f"storyboard_{i}_prompt"]
+                sb_duration = storyboards[f"storyboard_{i}_duration"]
+                validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512)
+                multi_prompt_list.append(
+                    MultiPromptEntry(
+                        index=i,
+                        prompt=sb_prompt,
+                        duration=str(sb_duration),
+                    )
+                )
+            total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list)
+            if total_storyboard_duration != duration:
+                raise ValueError(
+                    f"Total storyboard duration ({total_storyboard_duration}s) "
+                    f"must equal the global duration ({duration}s)."
+                )
+
         validate_image_dimensions(first_frame, min_width=300, min_height=300)
         validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1))
         image_list: list[OmniParamImage] = [
@@ -988,6 +1169,10 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                 duration=str(duration),
                 image_list=image_list,
                 mode="pro" if resolution == "1080p" else "std",
+                sound="on" if generate_audio else "off",
+                multi_shot=multi_shot,
+                multi_prompt=multi_prompt_list,
+                shot_type="customize" if multi_shot else None,
             ),
         )
         return await finish_omni_video_task(cls, response)
@@ -999,24 +1184,57 @@ class OmniProImageToVideoNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProImageToVideoNode",
-            display_name="Kling Omni Image to Video (Pro)",
+            display_name="Kling 3.0 Omni Image to Video",
             category="api node/video/Kling",
             description="Use up to 7 reference images to generate a video with the latest Kling model.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-video-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     tooltip="A text prompt describing the video content. "
-                    "This can include both positive and negative descriptions.",
+                    "This can include both positive and negative descriptions. "
+                    "Ignored when storyboards are enabled.",
                 ),
                 IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
-                IO.Int.Input("duration", default=3, min=3, max=10, display_mode=IO.NumberDisplay.slider),
+                IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider),
                 IO.Image.Input(
                     "reference_images",
                     tooltip="Up to 7 reference images.",
                 ),
                 IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.DynamicCombo.Input(
+                    "storyboards",
+                    options=[
+                        IO.DynamicCombo.Option("disabled", []),
+                        IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)),
+                        IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)),
+                        IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)),
+                        IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)),
+                        IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)),
+                        IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)),
+                    ],
+                    tooltip="Generate a series of video segments with individual prompts and durations. "
+                    "Only supported for kling-v3-omni.",
+                    optional=True,
+                ),
+                IO.Boolean.Input(
+                    "generate_audio",
+                    default=False,
+                    optional=True,
+                    tooltip="Generate audio for the video. Only supported for kling-v3-omni.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Video.Output(),
@@ -1028,11 +1246,15 @@ class OmniProImageToVideoNode(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
                   $mode := (widgets.resolution = "720p") ? "std" : "pro";
-                  $rates := {"std": 0.084, "pro": 0.112};
+                  $isV3 := $contains(widgets.model_name, "v3");
+                  $audio := $isV3 and widgets.generate_audio;
+                  $rates := $audio
+                    ? {"std": 0.112, "pro": 0.14}
+                    : {"std": 0.084, "pro": 0.112};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -1048,9 +1270,46 @@ class OmniProImageToVideoNode(IO.ComfyNode):
         duration: int,
         reference_images: Input.Image,
         resolution: str = "1080p",
+        storyboards: dict | None = None,
+        generate_audio: bool = False,
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
+        if model_name == "kling-video-o1":
+            if duration > 10:
+                raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
+            if generate_audio:
+                raise ValueError("kling-video-o1 does not support audio generation.")
+        stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
+        if stories_enabled and model_name == "kling-video-o1":
+            raise ValueError("kling-video-o1 does not support storyboards.")
         prompt = normalize_omni_prompt_references(prompt)
-        validate_string(prompt, min_length=1, max_length=2500)
+        validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500)
+
+        multi_shot = None
+        multi_prompt_list = None
+        if stories_enabled:
+            count = int(storyboards["storyboards"].split()[0])
+            multi_shot = True
+            multi_prompt_list = []
+            for i in range(1, count + 1):
+                sb_prompt = storyboards[f"storyboard_{i}_prompt"]
+                sb_duration = storyboards[f"storyboard_{i}_duration"]
+                validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512)
+                multi_prompt_list.append(
+                    MultiPromptEntry(
+                        index=i,
+                        prompt=sb_prompt,
+                        duration=str(sb_duration),
+                    )
+                )
+            total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list)
+            if total_storyboard_duration != duration:
+                raise ValueError(
+                    f"Total storyboard duration ({total_storyboard_duration}s) "
+                    f"must equal the global duration ({duration}s)."
+                )
+
         if get_number_of_images(reference_images) > 7:
             raise ValueError("The maximum number of reference images is 7.")
         for i in reference_images:
@@ -1070,6 +1329,10 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                 duration=str(duration),
                 image_list=image_list,
                 mode="pro" if resolution == "1080p" else "std",
+                sound="on" if generate_audio else "off",
+                multi_shot=multi_shot,
+                multi_prompt=multi_prompt_list,
+                shot_type="customize" if multi_shot else None,
             ),
         )
         return await finish_omni_video_task(cls, response)
@@ -1081,11 +1344,11 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProVideoToVideoNode",
-            display_name="Kling Omni Video to Video (Pro)",
+            display_name="Kling 3.0 Omni Video to Video",
             category="api node/video/Kling",
             description="Use a video and up to 4 reference images to generate a video with the latest Kling model.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-video-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
@@ -1102,6 +1365,17 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
                     optional=True,
                 ),
                 IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Video.Output(),
@@ -1135,7 +1409,9 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
         keep_original_sound: bool,
         reference_images: Input.Image | None = None,
         resolution: str = "1080p",
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
         prompt = normalize_omni_prompt_references(prompt)
         validate_string(prompt, min_length=1, max_length=2500)
         validate_video_duration(reference_video, min_duration=3.0, max_duration=10.05)
@@ -1179,11 +1455,11 @@ class OmniProEditVideoNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProEditVideoNode",
-            display_name="Kling Omni Edit Video (Pro)",
+            display_name="Kling 3.0 Omni Edit Video",
             category="api node/video/Kling",
             description="Edit an existing video with the latest model from Kling.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-video-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
@@ -1198,6 +1474,17 @@ class OmniProEditVideoNode(IO.ComfyNode):
                     optional=True,
                 ),
                 IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Video.Output(),
@@ -1229,7 +1516,9 @@ class OmniProEditVideoNode(IO.ComfyNode):
         keep_original_sound: bool,
         reference_images: Input.Image | None = None,
         resolution: str = "1080p",
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
         prompt = normalize_omni_prompt_references(prompt)
         validate_string(prompt, min_length=1, max_length=2500)
         validate_video_duration(video, min_duration=3.0, max_duration=10.05)
@@ -1273,27 +1562,43 @@ class OmniProImageNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingOmniProImageNode",
-            display_name="Kling Omni Image (Pro)",
+            display_name="Kling 3.0 Omni Image",
             category="api node/image/Kling",
             description="Create or edit images with the latest model from Kling.",
             inputs=[
-                IO.Combo.Input("model_name", options=["kling-image-o1"]),
+                IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-image-o1"]),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     tooltip="A text prompt describing the image content. "
                     "This can include both positive and negative descriptions.",
                 ),
-                IO.Combo.Input("resolution", options=["1K", "2K"]),
+                IO.Combo.Input("resolution", options=["1K", "2K", "4K"]),
                 IO.Combo.Input(
                     "aspect_ratio",
                     options=["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3", "21:9"],
                 ),
+                IO.Combo.Input(
+                    "series_amount",
+                    options=["disabled", "2", "3", "4", "5", "6", "7", "8", "9"],
+                    tooltip="Generate a series of images. Not supported for kling-image-o1.",
+                ),
                 IO.Image.Input(
                     "reference_images",
                     tooltip="Up to 10 additional reference images.",
                     optional=True,
                 ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Image.Output(),
@@ -1305,7 +1610,16 @@ class OmniProImageNode(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.028}""",
+                depends_on=IO.PriceBadgeDepends(widgets=["resolution", "series_amount", "model_name"]),
+                expr="""
+                (
+                  $prices := {"1k": 0.028, "2k": 0.028, "4k": 0.056};
+                  $base := $lookup($prices, widgets.resolution);
+                  $isO1 := widgets.model_name = "kling-image-o1";
+                  $mult := ($isO1 or widgets.series_amount = "disabled") ? 1 : $number(widgets.series_amount);
+                  {"type":"usd","usd": $base * $mult}
+                )
+                """,
             ),
         )
 
@@ -1316,8 +1630,13 @@ class OmniProImageNode(IO.ComfyNode):
         prompt: str,
         resolution: str,
         aspect_ratio: str,
+        series_amount: str = "disabled",
         reference_images: Input.Image | None = None,
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
+        if model_name == "kling-image-o1" and resolution == "4K":
+            raise ValueError("4K resolution is not supported for kling-image-o1 model.")
         prompt = normalize_omni_prompt_references(prompt)
         validate_string(prompt, min_length=1, max_length=2500)
         image_list: list[OmniImageParamImage] = []
@@ -1329,6 +1648,9 @@ class OmniProImageNode(IO.ComfyNode):
                 validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1))
             for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"):
                 image_list.append(OmniImageParamImage(image=i))
+        use_series = series_amount != "disabled"
+        if use_series and model_name == "kling-image-o1":
+            raise ValueError("kling-image-o1 does not support series generation.")
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"),
@@ -1339,6 +1661,8 @@ class OmniProImageNode(IO.ComfyNode):
                 resolution=resolution.lower(),
                 aspect_ratio=aspect_ratio,
                 image_list=image_list if image_list else None,
+                result_type="series" if use_series else None,
+                series_amount=int(series_amount) if use_series else None,
             ),
         )
         if response.code:
@@ -1351,7 +1675,9 @@ class OmniProImageNode(IO.ComfyNode):
             response_model=TaskStatusResponse,
             status_extractor=lambda r: (r.data.task_status if r.data else None),
         )
-        return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url))
+        images = final_response.data.task_result.series_images or final_response.data.task_result.images
+        tensors = [await download_url_to_image_tensor(img.url) for img in images]
+        return IO.NodeOutput(torch.cat(tensors, dim=0))
 
 
 class KlingCameraControlT2VNode(IO.ComfyNode):
@@ -2119,7 +2445,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingImageGenerationNode",
-            display_name="Kling Image Generation",
+            display_name="Kling 3.0 Image",
             category="api node/image/Kling",
             description="Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.",
             inputs=[
@@ -2147,11 +2473,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
                     display_mode=IO.NumberDisplay.slider,
                     tooltip="Subject reference similarity",
                 ),
-                IO.Combo.Input(
-                    "model_name",
-                    options=[i.value for i in KlingImageGenModelName],
-                    default="kling-v2",
-                ),
+                IO.Combo.Input("model_name", options=["kling-v3", "kling-v2", "kling-v1-5"]),
                 IO.Combo.Input(
                     "aspect_ratio",
                     options=[i.value for i in KlingImageGenAspectRatio],
@@ -2165,6 +2487,17 @@ class KlingImageGenerationNode(IO.ComfyNode):
                     tooltip="Number of generated images",
                 ),
                 IO.Image.Input("image", optional=True),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Image.Output(),
@@ -2183,7 +2516,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
                   $base :=
                     $contains($m,"kling-v1-5")
                       ? (inputs.image.connected ? 0.028 : 0.014)
-                      : ($contains($m,"kling-v1") ? 0.0035 : 0.014);
+                      : $contains($m,"kling-v3") ? 0.028 : 0.014;
                   {"type":"usd","usd": $base * widgets.n}
                 )
                 """,
@@ -2193,7 +2526,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
     @classmethod
     async def execute(
         cls,
-        model_name: KlingImageGenModelName,
+        model_name: str,
         prompt: str,
         negative_prompt: str,
         image_type: KlingImageGenImageReferenceType,
@@ -2202,17 +2535,11 @@ class KlingImageGenerationNode(IO.ComfyNode):
         n: int,
         aspect_ratio: KlingImageGenAspectRatio,
         image: torch.Tensor | None = None,
+        seed: int = 0,
     ) -> IO.NodeOutput:
+        _ = seed
         validate_string(prompt, field_name="prompt", min_length=1, max_length=MAX_PROMPT_LENGTH_IMAGE_GEN)
         validate_string(negative_prompt, field_name="negative_prompt", max_length=MAX_PROMPT_LENGTH_IMAGE_GEN)
-
-        if image is None:
-            image_type = None
-        elif model_name == KlingImageGenModelName.kling_v1:
-            raise ValueError(f"The model {KlingImageGenModelName.kling_v1.value} does not support reference images.")
-        else:
-            image = tensor_to_base64_string(image)
-
         task_creation_response = await sync_op(
             cls,
             ApiEndpoint(path=PATH_IMAGE_GENERATIONS, method="POST"),
@@ -2221,8 +2548,8 @@ class KlingImageGenerationNode(IO.ComfyNode):
                 model_name=model_name,
                 prompt=prompt,
                 negative_prompt=negative_prompt,
-                image=image,
-                image_reference=image_type,
+                image=tensor_to_base64_string(image) if image is not None else None,
+                image_reference=image_type if image is not None else None,
                 image_fidelity=image_fidelity,
                 human_fidelity=human_fidelity,
                 n=n,
@@ -2252,7 +2579,7 @@ class TextToVideoWithAudio(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingTextToVideoWithAudio",
-            display_name="Kling Text to Video with Audio",
+            display_name="Kling 2.6 Text to Video with Audio",
             category="api node/video/Kling",
             inputs=[
                 IO.Combo.Input("model_name", options=["kling-v2-6"]),
@@ -2320,7 +2647,7 @@ class ImageToVideoWithAudio(IO.ComfyNode):
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="KlingImageToVideoWithAudio",
-            display_name="Kling Image(First Frame) to Video with Audio",
+            display_name="Kling 2.6 Image(First Frame) to Video with Audio",
             category="api node/video/Kling",
             inputs=[
                 IO.Combo.Input("model_name", options=["kling-v2-6"]),
@@ -2478,6 +2805,335 @@ class MotionControl(IO.ComfyNode):
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
 
 
+class KlingVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="KlingVideoNode",
+            display_name="Kling 3.0 Video",
+            category="api node/video/Kling",
+            description="Generate videos with Kling V3. "
+            "Supports text-to-video and image-to-video with optional storyboard multi-prompt and audio generation.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "multi_shot",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "disabled",
+                            [
+                                IO.String.Input("prompt", multiline=True, default=""),
+                                IO.String.Input("negative_prompt", multiline=True, default=""),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    display_mode=IO.NumberDisplay.slider,
+                                ),
+                            ],
+                        ),
+                        IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)),
+                        IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)),
+                        IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)),
+                        IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)),
+                        IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)),
+                        IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)),
+                    ],
+                    tooltip="Generate a series of video segments with individual prompts and durations.",
+                ),
+                IO.Boolean.Input("generate_audio", default=True),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "kling-v3",
+                            [
+                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                                IO.Combo.Input(
+                                    "aspect_ratio",
+                                    options=["16:9", "9:16", "1:1"],
+                                    tooltip="Ignored in image-to-video mode.",
+                                ),
+                            ],
+                        ),
+                    ],
+                    tooltip="Model and generation settings.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                            "results are non-deterministic regardless of seed.",
+                ),
+                IO.Image.Input(
+                    "start_frame",
+                    optional=True,
+                    tooltip="Optional start frame image. When connected, switches to image-to-video mode.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(
+                    widgets=[
+                        "model.resolution",
+                        "generate_audio",
+                        "multi_shot",
+                        "multi_shot.duration",
+                        "multi_shot.storyboard_1_duration",
+                        "multi_shot.storyboard_2_duration",
+                        "multi_shot.storyboard_3_duration",
+                        "multi_shot.storyboard_4_duration",
+                        "multi_shot.storyboard_5_duration",
+                        "multi_shot.storyboard_6_duration",
+                    ],
+                ),
+                expr="""
+                (
+                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $res := $lookup(widgets, "model.resolution");
+                  $audio := widgets.generate_audio ? "on" : "off";
+                  $rate := $lookup($lookup($rates, $res), $audio);
+                  $ms := widgets.multi_shot;
+                  $isSb := $ms != "disabled";
+                  $n := $isSb ? $number($substring($ms, 0, 1)) : 0;
+                  $d1 := $lookup(widgets, "multi_shot.storyboard_1_duration");
+                  $d2 := $n >= 2 ? $lookup(widgets, "multi_shot.storyboard_2_duration") : 0;
+                  $d3 := $n >= 3 ? $lookup(widgets, "multi_shot.storyboard_3_duration") : 0;
+                  $d4 := $n >= 4 ? $lookup(widgets, "multi_shot.storyboard_4_duration") : 0;
+                  $d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0;
+                  $d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0;
+                  $dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration");
+                  {"type":"usd","usd": $rate * $dur}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        multi_shot: dict,
+        generate_audio: bool,
+        model: dict,
+        seed: int,
+        start_frame: Input.Image | None = None,
+    ) -> IO.NodeOutput:
+        _ = seed
+        mode = "pro" if model["resolution"] == "1080p" else "std"
+        custom_multi_shot = False
+        if multi_shot["multi_shot"] == "disabled":
+            shot_type = None
+        else:
+            shot_type = "customize"
+            custom_multi_shot = True
+
+        multi_prompt_list = None
+        if shot_type == "customize":
+            count = int(multi_shot["multi_shot"].split()[0])
+            multi_prompt_list = []
+            for i in range(1, count + 1):
+                sb_prompt = multi_shot[f"storyboard_{i}_prompt"]
+                sb_duration = multi_shot[f"storyboard_{i}_duration"]
+                validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512)
+                multi_prompt_list.append(
+                    MultiPromptEntry(
+                        index=i,
+                        prompt=sb_prompt,
+                        duration=str(sb_duration),
+                    )
+                )
+            duration = sum(int(e.duration) for e in multi_prompt_list)
+            if duration < 3 or duration > 15:
+                raise ValueError(
+                    f"Total storyboard duration ({duration}s) must be between 3 and 15 seconds."
+                )
+        else:
+            duration = multi_shot["duration"]
+            validate_string(multi_shot["prompt"], min_length=1, max_length=2500)
+
+        if start_frame is not None:
+            validate_image_dimensions(start_frame, min_width=300, min_height=300)
+            validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
+            image_url = await upload_image_to_comfyapi(cls, start_frame, wait_label="Uploading start frame")
+            response = await sync_op(
+                cls,
+                ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
+                response_model=TaskStatusResponse,
+                data=ImageToVideoWithAudioRequest(
+                    model_name=model["model"],
+                    image=image_url,
+                    prompt=None if custom_multi_shot else multi_shot["prompt"],
+                    negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"],
+                    mode=mode,
+                    duration=str(duration),
+                    sound="on" if generate_audio else "off",
+                    multi_shot=True if shot_type else None,
+                    multi_prompt=multi_prompt_list,
+                    shot_type=shot_type,
+                ),
+            )
+            poll_path = f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"
+        else:
+            response = await sync_op(
+                cls,
+                ApiEndpoint(path="/proxy/kling/v1/videos/text2video", method="POST"),
+                response_model=TaskStatusResponse,
+                data=TextToVideoWithAudioRequest(
+                    model_name=model["model"],
+                    aspect_ratio=model["aspect_ratio"],
+                    prompt=None if custom_multi_shot else multi_shot["prompt"],
+                    negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"],
+                    mode=mode,
+                    duration=str(duration),
+                    sound="on" if generate_audio else "off",
+                    multi_shot=True if shot_type else None,
+                    multi_prompt=multi_prompt_list,
+                    shot_type=shot_type,
+                ),
+            )
+            poll_path = f"/proxy/kling/v1/videos/text2video/{response.data.task_id}"
+
+        if response.code:
+            raise RuntimeError(
+                f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
+            )
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=poll_path),
+            response_model=TaskStatusResponse,
+            status_extractor=lambda r: (r.data.task_status if r.data else None),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
+
+
+class KlingFirstLastFrameNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="KlingFirstLastFrameNode",
+            display_name="Kling 3.0 First-Last-Frame to Video",
+            category="api node/video/Kling",
+            description="Generate videos with Kling V3 using first and last frames.",
+            inputs=[
+                IO.String.Input("prompt", multiline=True, default=""),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=3,
+                    max=15,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Image.Input("first_frame"),
+                IO.Image.Input("end_frame"),
+                IO.Boolean.Input("generate_audio", default=True),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "kling-v3",
+                            [
+                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                            ],
+                        ),
+                    ],
+                    tooltip="Model and generation settings.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(
+                    widgets=["model.resolution", "generate_audio", "duration"],
+                ),
+                expr="""
+                (
+                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $res := $lookup(widgets, "model.resolution");
+                  $audio := widgets.generate_audio ? "on" : "off";
+                  $rate := $lookup($lookup($rates, $res), $audio);
+                  {"type":"usd","usd": $rate * widgets.duration}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        duration: int,
+        first_frame: Input.Image,
+        end_frame: Input.Image,
+        generate_audio: bool,
+        model: dict,
+        seed: int,
+    ) -> IO.NodeOutput:
+        _ = seed
+        validate_string(prompt, min_length=1, max_length=2500)
+        validate_image_dimensions(first_frame, min_width=300, min_height=300)
+        validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1))
+        validate_image_dimensions(end_frame, min_width=300, min_height=300)
+        validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1))
+        image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame")
+        image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame")
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
+            response_model=TaskStatusResponse,
+            data=ImageToVideoWithAudioRequest(
+                model_name=model["model"],
+                image=image_url,
+                image_tail=image_tail_url,
+                prompt=prompt,
+                mode="pro" if model["resolution"] == "1080p" else "std",
+                duration=str(duration),
+                sound="on" if generate_audio else "off",
+            ),
+        )
+        if response.code:
+            raise RuntimeError(
+                f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
+            )
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
+            response_model=TaskStatusResponse,
+            status_extractor=lambda r: (r.data.task_status if r.data else None),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
+
+
 class KlingExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -2504,6 +3160,8 @@ class KlingExtension(ComfyExtension):
             TextToVideoWithAudio,
             ImageToVideoWithAudio,
             MotionControl,
+            KlingVideoNode,
+            KlingFirstLastFrameNode,
         ]
 
 
diff --git a/comfy_api_nodes/nodes_magnific.py b/comfy_api_nodes/nodes_magnific.py
index 013e71cc8..83a581c5d 100644
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@@ -30,6 +30,30 @@ from comfy_api_nodes.util import (
     validate_image_dimensions,
 )
 
+_EUR_TO_USD = 1.19
+
+
+def _tier_price_eur(megapixels: float) -> float:
+    """Price in EUR for a single Magnific upscaling step based on input megapixels."""
+    if megapixels <= 1.3:
+        return 0.143
+    if megapixels <= 3.0:
+        return 0.286
+    if megapixels <= 6.4:
+        return 0.429
+    return 1.716
+
+
+def _calculate_magnific_upscale_price_usd(width: int, height: int, scale: int) -> float:
+    """Calculate total Magnific upscale price in USD for given input dimensions and scale factor."""
+    num_steps = int(math.log2(scale))
+    total_eur = 0.0
+    pixels = width * height
+    for _ in range(num_steps):
+        total_eur += _tier_price_eur(pixels / 1_000_000)
+        pixels *= 4
+    return round(total_eur * _EUR_TO_USD, 2)
+
 
 class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
     @classmethod
@@ -103,11 +127,20 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["scale_factor", "auto_downscale"]),
                 expr="""
                 (
-                  $max := widgets.scale_factor = "2x" ? 1.326 : 1.657;
-                  {"type": "range_usd", "min_usd": 0.11, "max_usd": $max}
+                  $ad := widgets.auto_downscale;
+                  $mins := $ad
+                    ? {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.515}
+                    : {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844};
+                  $maxs := {"2x": 0.515, "4x": 0.844, "8x": 1.015, "16x": 1.187};
+                  {
+                    "type": "range_usd",
+                    "min_usd": $lookup($mins, widgets.scale_factor),
+                    "max_usd": $lookup($maxs, widgets.scale_factor),
+                    "format": { "approximate": true }
+                  }
                 )
                 """,
             ),
@@ -168,6 +201,10 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
                     f"Use a smaller input image or lower scale factor."
                 )
 
+        final_height, final_width = get_image_dimensions(image)
+        actual_scale = int(scale_factor.rstrip("x"))
+        price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, actual_scale)
+
         initial_res = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler", method="POST"),
@@ -189,6 +226,7 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
             ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler/{initial_res.task_id}"),
             response_model=TaskResponse,
             status_extractor=lambda x: x.status,
+            price_extractor=lambda _: price_usd,
             poll_interval=10.0,
             max_poll_attempts=480,
         )
@@ -257,8 +295,14 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                 depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]),
                 expr="""
                 (
-                  $max := widgets.scale_factor = "2x" ? 1.326 : 1.657;
-                  {"type": "range_usd", "min_usd": 0.11, "max_usd": $max}
+                  $mins := {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844};
+                  $maxs := {"2x": 2.045, "4x": 2.545, "8x": 2.889, "16x": 3.06};
+                  {
+                    "type": "range_usd",
+                    "min_usd": $lookup($mins, widgets.scale_factor),
+                    "max_usd": $lookup($maxs, widgets.scale_factor),
+                    "format": { "approximate": true }
+                  }
                 )
                 """,
             ),
@@ -321,6 +365,9 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                     f"Use a smaller input image or lower scale factor."
                 )
 
+        final_height, final_width = get_image_dimensions(image)
+        price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, requested_scale)
+
         initial_res = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler-precision-v2", method="POST"),
@@ -339,6 +386,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
             ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler-precision-v2/{initial_res.task_id}"),
             response_model=TaskResponse,
             status_extractor=lambda x: x.status,
+            price_extractor=lambda _: price_usd,
             poll_interval=10.0,
             max_poll_attempts=480,
         )
@@ -877,8 +925,8 @@ class MagnificExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
         return [
-            # MagnificImageUpscalerCreativeNode,
-            # MagnificImageUpscalerPreciseV2Node,
+            MagnificImageUpscalerCreativeNode,
+            MagnificImageUpscalerPreciseV2Node,
             MagnificImageStyleTransferNode,
             MagnificImageRelightNode,
             MagnificImageSkinEnhancerNode,
diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py
index 08315fa2b..78a230529 100644
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@@ -219,8 +219,8 @@ class MoonvalleyImg2VideoNode(IO.ComfyNode):
                 ),
                 IO.Int.Input(
                     "steps",
-                    default=33,
-                    min=1,
+                    default=80,
+                    min=75,  # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0)
                     max=100,
                     step=1,
                     tooltip="Number of denoising steps",
@@ -340,8 +340,8 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode):
                 ),
                 IO.Int.Input(
                     "steps",
-                    default=33,
-                    min=1,
+                    default=60,
+                    min=60,  # steps should be greater or equal to cooldown_steps(36) + warmup_steps(24)
                     max=100,
                     step=1,
                     display_mode=IO.NumberDisplay.number,
@@ -370,7 +370,7 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode):
         video: Input.Video | None = None,
         control_type: str = "Motion Transfer",
         motion_intensity: int | None = 100,
-        steps=33,
+        steps=60,
         prompt_adherence=4.5,
     ) -> IO.NodeOutput:
         validated_video = validate_video_to_video_input(video)
@@ -465,8 +465,8 @@ class MoonvalleyTxt2VideoNode(IO.ComfyNode):
                 ),
                 IO.Int.Input(
                     "steps",
-                    default=33,
-                    min=1,
+                    default=80,
+                    min=75,  # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0)
                     max=100,
                     step=1,
                     tooltip="Inference steps",
diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index f05aaab7b..332107a82 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -43,7 +43,6 @@ class SupportedOpenAIModel(str, Enum):
     o1 = "o1"
     o3 = "o3"
     o1_pro = "o1-pro"
-    gpt_4o = "gpt-4o"
     gpt_4_1 = "gpt-4.1"
     gpt_4_1_mini = "gpt-4.1-mini"
     gpt_4_1_nano = "gpt-4.1-nano"
@@ -649,11 +648,6 @@ class OpenAIChatNode(IO.ComfyNode):
                     "usd": [0.01, 0.04],
                     "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
                   }
-                  : $contains($m, "gpt-4o") ? {
-                    "type": "list_usd",
-                    "usd": [0.0025, 0.01],
-                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
-                  }
                   : $contains($m, "gpt-4.1-nano") ? {
                     "type": "list_usd",
                     "usd": [0.0001, 0.0004],
diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py
index 18b020eef..f8a0ba8af 100644
--- a/comfy_api_nodes/util/__init__.py
+++ b/comfy_api_nodes/util/__init__.py
@@ -33,6 +33,7 @@ from .download_helpers import (
     download_url_to_video_output,
 )
 from .upload_helpers import (
+    upload_3d_model_to_comfyapi,
     upload_audio_to_comfyapi,
     upload_file_to_comfyapi,
     upload_image_to_comfyapi,
@@ -62,6 +63,7 @@ __all__ = [
     "sync_op",
     "sync_op_raw",
     # Upload helpers
+    "upload_3d_model_to_comfyapi",
     "upload_audio_to_comfyapi",
     "upload_file_to_comfyapi",
     "upload_image_to_comfyapi",
diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py
index 8a1259506..94886af7b 100644
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -57,6 +57,7 @@ class _RequestConfig:
     files: dict[str, Any] | list[tuple[str, Any]] | None
     multipart_parser: Callable | None
     max_retries: int
+    max_retries_on_rate_limit: int
     retry_delay: float
     retry_backoff: float
     wait_label: str = "Waiting"
@@ -65,6 +66,7 @@ class _RequestConfig:
     final_label_on_success: str | None = "Completed"
     progress_origin_ts: float | None = None
     price_extractor: Callable[[dict[str, Any]], float | None] | None = None
+    is_rate_limited: Callable[[int, Any], bool] | None = None
 
 
 @dataclass
@@ -78,7 +80,7 @@ class _PollUIState:
     active_since: float | None = None  # start time of current active interval (None if queued)
 
 
-_RETRY_STATUS = {408, 429, 500, 502, 503, 504}
+_RETRY_STATUS = {408, 500, 502, 503, 504}  # status 429 is handled separately
 COMPLETED_STATUSES = ["succeeded", "succeed", "success", "completed", "finished", "done", "complete"]
 FAILED_STATUSES = ["cancelled", "canceled", "canceling", "fail", "failed", "error"]
 QUEUED_STATUSES = ["created", "queued", "queueing", "submitted", "initializing"]
@@ -103,6 +105,8 @@ async def sync_op(
     final_label_on_success: str | None = "Completed",
     progress_origin_ts: float | None = None,
     monitor_progress: bool = True,
+    max_retries_on_rate_limit: int = 16,
+    is_rate_limited: Callable[[int, Any], bool] | None = None,
 ) -> M:
     raw = await sync_op_raw(
         cls,
@@ -122,6 +126,8 @@ async def sync_op(
         final_label_on_success=final_label_on_success,
         progress_origin_ts=progress_origin_ts,
         monitor_progress=monitor_progress,
+        max_retries_on_rate_limit=max_retries_on_rate_limit,
+        is_rate_limited=is_rate_limited,
     )
     if not isinstance(raw, dict):
         raise Exception("Expected JSON response to validate into a Pydantic model, got non-JSON (binary or text).")
@@ -143,9 +149,9 @@ async def poll_op(
     poll_interval: float = 5.0,
     max_poll_attempts: int = 160,
     timeout_per_poll: float = 120.0,
-    max_retries_per_poll: int = 3,
+    max_retries_per_poll: int = 10,
     retry_delay_per_poll: float = 1.0,
-    retry_backoff_per_poll: float = 2.0,
+    retry_backoff_per_poll: float = 1.4,
     estimated_duration: int | None = None,
     cancel_endpoint: ApiEndpoint | None = None,
     cancel_timeout: float = 10.0,
@@ -194,6 +200,8 @@ async def sync_op_raw(
     final_label_on_success: str | None = "Completed",
     progress_origin_ts: float | None = None,
     monitor_progress: bool = True,
+    max_retries_on_rate_limit: int = 16,
+    is_rate_limited: Callable[[int, Any], bool] | None = None,
 ) -> dict[str, Any] | bytes:
     """
     Make a single network request.
@@ -222,6 +230,8 @@ async def sync_op_raw(
         final_label_on_success=final_label_on_success,
         progress_origin_ts=progress_origin_ts,
         price_extractor=price_extractor,
+        max_retries_on_rate_limit=max_retries_on_rate_limit,
+        is_rate_limited=is_rate_limited,
     )
     return await _request_base(cfg, expect_binary=as_binary)
 
@@ -240,9 +250,9 @@ async def poll_op_raw(
     poll_interval: float = 5.0,
     max_poll_attempts: int = 160,
     timeout_per_poll: float = 120.0,
-    max_retries_per_poll: int = 3,
+    max_retries_per_poll: int = 10,
     retry_delay_per_poll: float = 1.0,
-    retry_backoff_per_poll: float = 2.0,
+    retry_backoff_per_poll: float = 1.4,
     estimated_duration: int | None = None,
     cancel_endpoint: ApiEndpoint | None = None,
     cancel_timeout: float = 10.0,
@@ -506,7 +516,7 @@ def _friendly_http_message(status: int, body: Any) -> str:
     if status == 409:
         return "There is a problem with your account. Please contact support@comfy.org."
     if status == 429:
-        return "Rate Limit Exceeded: Please try again later."
+        return "Rate Limit Exceeded: The server returned 429 after all retry attempts. Please wait and try again."
     try:
         if isinstance(body, dict):
             err = body.get("error")
@@ -586,6 +596,8 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
     start_time = cfg.progress_origin_ts if cfg.progress_origin_ts is not None else time.monotonic()
     attempt = 0
     delay = cfg.retry_delay
+    rate_limit_attempts = 0
+    rate_limit_delay = cfg.retry_delay
     operation_succeeded: bool = False
     final_elapsed_seconds: int | None = None
     extracted_price: float | None = None
@@ -653,17 +665,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                 payload_headers["Content-Type"] = "application/json"
                 payload_kw["json"] = cfg.data or {}
 
-            try:
-                request_logger.log_request_response(
-                    operation_id=operation_id,
-                    request_method=method,
-                    request_url=url,
-                    request_headers=dict(payload_headers) if payload_headers else None,
-                    request_params=dict(params) if params else None,
-                    request_data=request_body_log,
-                )
-            except Exception as _log_e:
-                logging.debug("[DEBUG] request logging failed: %s", _log_e)
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method=method,
+                request_url=url,
+                request_headers=dict(payload_headers) if payload_headers else None,
+                request_params=dict(params) if params else None,
+                request_data=request_body_log,
+            )
 
             req_coro = sess.request(method, url, params=params, **payload_kw)
             req_task = asyncio.create_task(req_coro)
@@ -688,41 +697,33 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                         body = await resp.json()
                     except (ContentTypeError, json.JSONDecodeError):
                         body = await resp.text()
-                    if resp.status in _RETRY_STATUS and attempt <= cfg.max_retries:
+                    should_retry = False
+                    wait_time = 0.0
+                    retry_label = ""
+                    is_rl = resp.status == 429 or (
+                        cfg.is_rate_limited is not None and cfg.is_rate_limited(resp.status, body)
+                    )
+                    if is_rl and rate_limit_attempts < cfg.max_retries_on_rate_limit:
+                        rate_limit_attempts += 1
+                        wait_time = min(rate_limit_delay, 30.0)
+                        rate_limit_delay *= cfg.retry_backoff
+                        retry_label = f"rate-limit retry {rate_limit_attempts} of {cfg.max_retries_on_rate_limit}"
+                        should_retry = True
+                    elif resp.status in _RETRY_STATUS and (attempt - rate_limit_attempts) <= cfg.max_retries:
+                        wait_time = delay
+                        delay *= cfg.retry_backoff
+                        retry_label = f"retry {attempt - rate_limit_attempts} of {cfg.max_retries}"
+                        should_retry = True
+
+                    if should_retry:
                         logging.warning(
-                            "HTTP %s %s -> %s. Retrying in %.2fs (retry %d of %d).",
+                            "HTTP %s %s -> %s. Waiting %.2fs (%s).",
                             method,
                             url,
                             resp.status,
-                            delay,
-                            attempt,
-                            cfg.max_retries,
+                            wait_time,
+                            retry_label,
                         )
-                        try:
-                            request_logger.log_request_response(
-                                operation_id=operation_id,
-                                request_method=method,
-                                request_url=url,
-                                response_status_code=resp.status,
-                                response_headers=dict(resp.headers),
-                                response_content=body,
-                                error_message=_friendly_http_message(resp.status, body),
-                            )
-                        except Exception as _log_e:
-                            logging.debug("[DEBUG] response logging failed: %s", _log_e)
-
-                        await sleep_with_interrupt(
-                            delay,
-                            cfg.node_cls,
-                            cfg.wait_label if cfg.monitor_progress else None,
-                            start_time if cfg.monitor_progress else None,
-                            cfg.estimated_total,
-                            display_callback=_display_time_progress if cfg.monitor_progress else None,
-                        )
-                        delay *= cfg.retry_backoff
-                        continue
-                    msg = _friendly_http_message(resp.status, body)
-                    try:
                         request_logger.log_request_response(
                             operation_id=operation_id,
                             request_method=method,
@@ -730,10 +731,27 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                             response_status_code=resp.status,
                             response_headers=dict(resp.headers),
                             response_content=body,
-                            error_message=msg,
+                            error_message=f"HTTP {resp.status} ({retry_label}, will retry in {wait_time:.1f}s)",
                         )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                        await sleep_with_interrupt(
+                            wait_time,
+                            cfg.node_cls,
+                            cfg.wait_label if cfg.monitor_progress else None,
+                            start_time if cfg.monitor_progress else None,
+                            cfg.estimated_total,
+                            display_callback=_display_time_progress if cfg.monitor_progress else None,
+                        )
+                        continue
+                    msg = _friendly_http_message(resp.status, body)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=body,
+                        error_message=msg,
+                    )
                     raise Exception(msg)
 
                 if expect_binary:
@@ -753,17 +771,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                     bytes_payload = bytes(buff)
                     operation_succeeded = True
                     final_elapsed_seconds = int(time.monotonic() - start_time)
-                    try:
-                        request_logger.log_request_response(
-                            operation_id=operation_id,
-                            request_method=method,
-                            request_url=url,
-                            response_status_code=resp.status,
-                            response_headers=dict(resp.headers),
-                            response_content=bytes_payload,
-                        )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=bytes_payload,
+                    )
                     return bytes_payload
                 else:
                     try:
@@ -780,45 +795,39 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                         extracted_price = cfg.price_extractor(payload) if cfg.price_extractor else None
                     operation_succeeded = True
                     final_elapsed_seconds = int(time.monotonic() - start_time)
-                    try:
-                        request_logger.log_request_response(
-                            operation_id=operation_id,
-                            request_method=method,
-                            request_url=url,
-                            response_status_code=resp.status,
-                            response_headers=dict(resp.headers),
-                            response_content=response_content_to_log,
-                        )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=response_content_to_log,
+                    )
                     return payload
 
         except ProcessingInterrupted:
             logging.debug("Polling was interrupted by user")
             raise
         except (ClientError, OSError) as e:
-            if attempt <= cfg.max_retries:
+            if (attempt - rate_limit_attempts) <= cfg.max_retries:
                 logging.warning(
                     "Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s",
                     method,
                     url,
                     delay,
-                    attempt,
+                    attempt - rate_limit_attempts,
                     cfg.max_retries,
                     str(e),
                 )
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method=method,
-                        request_url=url,
-                        request_headers=dict(payload_headers) if payload_headers else None,
-                        request_params=dict(params) if params else None,
-                        request_data=request_body_log,
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
-                except Exception as _log_e:
-                    logging.debug("[DEBUG] request error logging failed: %s", _log_e)
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method=method,
+                    request_url=url,
+                    request_headers=dict(payload_headers) if payload_headers else None,
+                    request_params=dict(params) if params else None,
+                    request_data=request_body_log,
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                 await sleep_with_interrupt(
                     delay,
                     cfg.node_cls,
@@ -831,23 +840,6 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                 continue
             diag = await _diagnose_connectivity()
             if not diag["internet_accessible"]:
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method=method,
-                        request_url=url,
-                        request_headers=dict(payload_headers) if payload_headers else None,
-                        request_params=dict(params) if params else None,
-                        request_data=request_body_log,
-                        error_message=f"LocalNetworkError: {str(e)}",
-                    )
-                except Exception as _log_e:
-                    logging.debug("[DEBUG] final error logging failed: %s", _log_e)
-                raise LocalNetworkError(
-                    "Unable to connect to the API server due to local network issues. "
-                    "Please check your internet connection and try again."
-                ) from e
-            try:
                 request_logger.log_request_response(
                     operation_id=operation_id,
                     request_method=method,
@@ -855,10 +847,21 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                     request_headers=dict(payload_headers) if payload_headers else None,
                     request_params=dict(params) if params else None,
                     request_data=request_body_log,
-                    error_message=f"ApiServerError: {str(e)}",
+                    error_message=f"LocalNetworkError: {str(e)}",
                 )
-            except Exception as _log_e:
-                logging.debug("[DEBUG] final error logging failed: %s", _log_e)
+                raise LocalNetworkError(
+                    "Unable to connect to the API server due to local network issues. "
+                    "Please check your internet connection and try again."
+                ) from e
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method=method,
+                request_url=url,
+                request_headers=dict(payload_headers) if payload_headers else None,
+                request_params=dict(params) if params else None,
+                request_data=request_body_log,
+                error_message=f"ApiServerError: {str(e)}",
+            )
             raise ApiServerError(
                 f"The API server at {default_base_url()} is currently unreachable. "
                 f"The service may be experiencing issues."
diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py
index 3e37e8a8c..82b6d22a5 100644
--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@@ -57,7 +57,7 @@ def tensor_to_bytesio(
     image: torch.Tensor,
     *,
     total_pixels: int | None = 2048 * 2048,
-    mime_type: str = "image/png",
+    mime_type: str | None = "image/png",
 ) -> BytesIO:
     """Converts a torch.Tensor image to a named BytesIO object.
 
diff --git a/comfy_api_nodes/util/download_helpers.py b/comfy_api_nodes/util/download_helpers.py
index 78bcf1fa1..aa588d038 100644
--- a/comfy_api_nodes/util/download_helpers.py
+++ b/comfy_api_nodes/util/download_helpers.py
@@ -167,27 +167,25 @@ async def download_url_to_bytesio(
                     with contextlib.suppress(Exception):
                         dest.seek(0)
 
-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=op_id,
-                        request_method="GET",
-                        request_url=url,
-                        response_status_code=resp.status,
-                        response_headers=dict(resp.headers),
-                        response_content=f"[streamed {written} bytes to dest]",
-                    )
+                request_logger.log_request_response(
+                    operation_id=op_id,
+                    request_method="GET",
+                    request_url=url,
+                    response_status_code=resp.status,
+                    response_headers=dict(resp.headers),
+                    response_content=f"[streamed {written} bytes to dest]",
+                )
                 return
         except asyncio.CancelledError:
             raise ProcessingInterrupted("Task cancelled") from None
         except (ClientError, OSError) as e:
             if attempt <= max_retries:
-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=op_id,
-                        request_method="GET",
-                        request_url=url,
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
+                request_logger.log_request_response(
+                    operation_id=op_id,
+                    request_method="GET",
+                    request_url=url,
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                 await sleep_with_interrupt(delay, cls, None, None, None)
                 delay *= retry_backoff
                 continue
diff --git a/comfy_api_nodes/util/request_logger.py b/comfy_api_nodes/util/request_logger.py
index e0cb4428d..fe0543d9b 100644
--- a/comfy_api_nodes/util/request_logger.py
+++ b/comfy_api_nodes/util/request_logger.py
@@ -8,7 +8,6 @@ from typing import Any
 
 import folder_paths
 
-# Get the logger instance
 logger = logging.getLogger(__name__)
 
 
@@ -91,38 +90,41 @@ def log_request_response(
     Filenames are sanitized and length-limited for cross-platform safety.
     If we still fail to write, we fall back to appending into api.log.
     """
-    log_dir = get_log_directory()
-    filepath = _build_log_filepath(log_dir, operation_id, request_url)
-
-    log_content: list[str] = []
-    log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
-    log_content.append(f"Operation ID: {operation_id}")
-    log_content.append("-" * 30 + " REQUEST " + "-" * 30)
-    log_content.append(f"Method: {request_method}")
-    log_content.append(f"URL: {request_url}")
-    if request_headers:
-        log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
-    if request_params:
-        log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
-    if request_data is not None:
-        log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
-
-    log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
-    if response_status_code is not None:
-        log_content.append(f"Status Code: {response_status_code}")
-    if response_headers:
-        log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
-    if response_content is not None:
-        log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
-    if error_message:
-        log_content.append(f"Error:\n{error_message}")
-
     try:
-        with open(filepath, "w", encoding="utf-8") as f:
-            f.write("\n".join(log_content))
-        logger.debug("API log saved to: %s", filepath)
-    except Exception as e:
-        logger.error("Error writing API log to %s: %s", filepath, str(e))
+        log_dir = get_log_directory()
+        filepath = _build_log_filepath(log_dir, operation_id, request_url)
+
+        log_content: list[str] = []
+        log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
+        log_content.append(f"Operation ID: {operation_id}")
+        log_content.append("-" * 30 + " REQUEST " + "-" * 30)
+        log_content.append(f"Method: {request_method}")
+        log_content.append(f"URL: {request_url}")
+        if request_headers:
+            log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
+        if request_params:
+            log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
+        if request_data is not None:
+            log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
+
+        log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
+        if response_status_code is not None:
+            log_content.append(f"Status Code: {response_status_code}")
+        if response_headers:
+            log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
+        if response_content is not None:
+            log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
+        if error_message:
+            log_content.append(f"Error:\n{error_message}")
+
+        try:
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write("\n".join(log_content))
+            logger.debug("API log saved to: %s", filepath)
+        except Exception as e:
+            logger.error("Error writing API log to %s: %s", filepath, str(e))
+    except Exception as _log_e:
+        logging.debug("[DEBUG] log_request_response failed: %s", _log_e)
 
 
 if __name__ == '__main__':
diff --git a/comfy_api_nodes/util/upload_helpers.py b/comfy_api_nodes/util/upload_helpers.py
index 83d936ce1..6d1d107a1 100644
--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@@ -164,6 +164,27 @@ async def upload_video_to_comfyapi(
     return await upload_file_to_comfyapi(cls, video_bytes_io, filename, upload_mime_type, wait_label)
 
 
+_3D_MIME_TYPES = {
+    "glb": "model/gltf-binary",
+    "obj": "model/obj",
+    "fbx": "application/octet-stream",
+}
+
+
+async def upload_3d_model_to_comfyapi(
+    cls: type[IO.ComfyNode],
+    model_3d: Types.File3D,
+    file_format: str,
+) -> str:
+    """Uploads a 3D model file to ComfyUI API and returns its download URL."""
+    return await upload_file_to_comfyapi(
+        cls,
+        model_3d.get_data(),
+        f"{uuid.uuid4()}.{file_format}",
+        _3D_MIME_TYPES.get(file_format, "application/octet-stream"),
+    )
+
+
 async def upload_file_to_comfyapi(
     cls: type[IO.ComfyNode],
     file_bytes_io: BytesIO,
@@ -255,17 +276,14 @@ async def upload_file(
         monitor_task = asyncio.create_task(_monitor())
         sess: aiohttp.ClientSession | None = None
         try:
-            try:
-                request_logger.log_request_response(
-                    operation_id=operation_id,
-                    request_method="PUT",
-                    request_url=upload_url,
-                    request_headers=headers or None,
-                    request_params=None,
-                    request_data=f"[File data {len(data)} bytes]",
-                )
-            except Exception as e:
-                logging.debug("[DEBUG] upload request logging failed: %s", e)
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method="PUT",
+                request_url=upload_url,
+                request_headers=headers or None,
+                request_params=None,
+                request_data=f"[File data {len(data)} bytes]",
+            )
 
             sess = aiohttp.ClientSession(timeout=timeout)
             req = sess.put(upload_url, data=data, headers=headers, skip_auto_headers=skip_auto_headers)
@@ -311,31 +329,27 @@ async def upload_file(
                         delay *= retry_backoff
                         continue
                     raise Exception(f"Failed to upload (HTTP {resp.status}).")
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method="PUT",
-                        request_url=upload_url,
-                        response_status_code=resp.status,
-                        response_headers=dict(resp.headers),
-                        response_content="File uploaded successfully.",
-                    )
-                except Exception as e:
-                    logging.debug("[DEBUG] upload response logging failed: %s", e)
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method="PUT",
+                    request_url=upload_url,
+                    response_status_code=resp.status,
+                    response_headers=dict(resp.headers),
+                    response_content="File uploaded successfully.",
+                )
                 return
         except asyncio.CancelledError:
             raise ProcessingInterrupted("Task cancelled") from None
         except (aiohttp.ClientError, OSError) as e:
             if attempt <= max_retries:
-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method="PUT",
-                        request_url=upload_url,
-                        request_headers=headers or None,
-                        request_data=f"[File data {len(data)} bytes]",
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method="PUT",
+                    request_url=upload_url,
+                    request_headers=headers or None,
+                    request_data=f"[File data {len(data)} bytes]",
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                 await sleep_with_interrupt(
                     delay,
                     cls,
diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py
index bf091a448..370014fb6 100644
--- a/comfy_execution/jobs.py
+++ b/comfy_execution/jobs.py
@@ -20,10 +20,60 @@ class JobStatus:
 
 
 # Media types that can be previewed in the frontend
-PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio'})
+PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio', '3d'})
 
 # 3D file extensions for preview fallback (no dedicated media_type exists)
-THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb'})
+THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb', '.usdz'})
+
+
+def has_3d_extension(filename: str) -> bool:
+    lower = filename.lower()
+    return any(lower.endswith(ext) for ext in THREE_D_EXTENSIONS)
+
+
+def normalize_output_item(item):
+    """Normalize a single output list item for the jobs API.
+
+    Returns the normalized item, or None to exclude it.
+    String items with 3D extensions become {filename, type, subfolder} dicts.
+    """
+    if item is None:
+        return None
+    if isinstance(item, str):
+        if has_3d_extension(item):
+            return {'filename': item, 'type': 'output', 'subfolder': '', 'mediaType': '3d'}
+        return None
+    if isinstance(item, dict):
+        return item
+    return None
+
+
+def normalize_outputs(outputs: dict) -> dict:
+    """Normalize raw node outputs for the jobs API.
+
+    Transforms string 3D filenames into file output dicts and removes
+    None items. All other items (non-3D strings, dicts, etc.) are
+    preserved as-is.
+    """
+    normalized = {}
+    for node_id, node_outputs in outputs.items():
+        if not isinstance(node_outputs, dict):
+            normalized[node_id] = node_outputs
+            continue
+        normalized_node = {}
+        for media_type, items in node_outputs.items():
+            if media_type == 'animated' or not isinstance(items, list):
+                normalized_node[media_type] = items
+                continue
+            normalized_items = []
+            for item in items:
+                if item is None:
+                    continue
+                norm = normalize_output_item(item)
+                normalized_items.append(norm if norm is not None else item)
+            normalized_node[media_type] = normalized_items
+        normalized[node_id] = normalized_node
+    return normalized
 
 
 def _extract_job_metadata(extra_data: dict) -> tuple[Optional[int], Optional[str]]:
@@ -45,9 +95,9 @@ def is_previewable(media_type: str, item: dict) -> bool:
     Maintains backwards compatibility with existing logic.
 
     Priority:
-    1. media_type is 'images', 'video', or 'audio'
+    1. media_type is 'images', 'video', 'audio', or '3d'
     2. format field starts with 'video/' or 'audio/'
-    3. filename has a 3D extension (.obj, .fbx, .gltf, .glb)
+    3. filename has a 3D extension (.obj, .fbx, .gltf, .glb, .usdz)
     """
     if media_type in PREVIEWABLE_MEDIA_TYPES:
         return True
@@ -139,7 +189,7 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs:
     })
 
     if include_outputs:
-        job['outputs'] = outputs
+        job['outputs'] = normalize_outputs(outputs)
         job['execution_status'] = status_info
         job['workflow'] = {
             'prompt': prompt,
@@ -171,18 +221,23 @@ def get_outputs_summary(outputs: dict) -> tuple[int, Optional[dict]]:
                 continue
 
             for item in items:
-                count += 1
-
-                if not isinstance(item, dict):
+                normalized = normalize_output_item(item)
+                if normalized is None:
                     continue
 
-                if preview_output is None and is_previewable(media_type, item):
+                count += 1
+
+                if preview_output is not None:
+                    continue
+
+                if isinstance(normalized, dict) and is_previewable(media_type, normalized):
                     enriched = {
-                        **item,
+                        **normalized,
                         'nodeId': node_id,
-                        'mediaType': media_type
                     }
-                    if item.get('type') == 'output':
+                    if 'mediaType' not in normalized:
+                        enriched['mediaType'] = media_type
+                    if normalized.get('type') == 'output':
                         preview_output = enriched
                     elif fallback_preview is None:
                         fallback_preview = enriched
diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py
index dde5bbd2a..9cf84ab4d 100644
--- a/comfy_extras/nodes_ace.py
+++ b/comfy_extras/nodes_ace.py
@@ -49,13 +49,14 @@ class TextEncodeAceStepAudio15(io.ComfyNode):
                 io.Float.Input("temperature", default=0.85, min=0.0, max=2.0, step=0.01, advanced=True),
                 io.Float.Input("top_p", default=0.9, min=0.0, max=2000.0, step=0.01, advanced=True),
                 io.Int.Input("top_k", default=0, min=0, max=100, advanced=True),
+                io.Float.Input("min_p", default=0.000, min=0.0, max=1.0, step=0.001, advanced=True),
             ],
             outputs=[io.Conditioning.Output()],
         )
 
     @classmethod
-    def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k) -> io.NodeOutput:
-        tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k)
+    def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k, min_p) -> io.NodeOutput:
+        tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p)
         conditioning = clip.encode_from_tokens_scheduled(tokens)
         return io.NodeOutput(conditioning)
 
diff --git a/comfy_extras/nodes_lora_extract.py b/comfy_extras/nodes_lora_extract.py
index fb89e03f4..1542d0a88 100644
--- a/comfy_extras/nodes_lora_extract.py
+++ b/comfy_extras/nodes_lora_extract.py
@@ -7,6 +7,7 @@ import logging
 from enum import Enum
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
+from tqdm.auto import trange
 
 CLAMP_QUANTILE = 0.99
 
@@ -49,12 +50,22 @@ LORA_TYPES = {"standard": LORAType.STANDARD,
               "full_diff": LORAType.FULL_DIFF}
 
 def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, bias_diff=False):
-    comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True)
+    comfy.model_management.load_models_gpu([model_diff])
     sd = model_diff.model_state_dict(filter_prefix=prefix_model)
 
-    for k in sd:
-        if k.endswith(".weight"):
+    sd_keys = list(sd.keys())
+    for index in trange(len(sd_keys), unit="weight"):
+        k = sd_keys[index]
+        op_keys = sd_keys[index].rsplit('.', 1)
+        if len(op_keys) < 2 or op_keys[1] not in ["weight", "bias"] or (op_keys[1] == "bias" and not bias_diff):
+            continue
+        op = comfy.utils.get_attr(model_diff.model, op_keys[0])
+        if hasattr(op, "comfy_cast_weights") and not getattr(op, "comfy_patched_weights", False):
+            weight_diff = model_diff.patch_weight_to_device(k, model_diff.load_device, return_weight=True)
+        else:
             weight_diff = sd[k]
+
+        if op_keys[1] == "weight":
             if lora_type == LORAType.STANDARD:
                 if weight_diff.ndim < 2:
                     if bias_diff:
@@ -69,8 +80,8 @@ def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora
             elif lora_type == LORAType.FULL_DIFF:
                 output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().half().cpu()
 
-        elif bias_diff and k.endswith(".bias"):
-            output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = sd[k].contiguous().half().cpu()
+        elif bias_diff and op_keys[1] == "bias":
+            output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = weight_diff.contiguous().half().cpu()
     return output_sd
 
 class LoraSave(io.ComfyNode):
diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py
index a52a90e2c..66dac10b1 100644
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@@ -655,6 +655,7 @@ class BatchImagesMasksLatentsNode(io.ComfyNode):
             batched = batch_masks(values)
         return io.NodeOutput(batched)
 
+
 class PostProcessingExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[io.ComfyNode]]:
diff --git a/comfy_extras/nodes_replacements.py b/comfy_extras/nodes_replacements.py
new file mode 100644
index 000000000..7684e854c
--- /dev/null
+++ b/comfy_extras/nodes_replacements.py
@@ -0,0 +1,103 @@
+from comfy_api.latest import ComfyExtension, io, ComfyAPI
+
+api = ComfyAPI()
+
+
+async def register_replacements():
+    """Register all built-in node replacements."""
+    await register_replacements_longeredge()
+    await register_replacements_batchimages()
+    await register_replacements_upscaleimage()
+    await register_replacements_controlnet()
+    await register_replacements_load3d()
+    await register_replacements_preview3d()
+    await register_replacements_svdimg2vid()
+    await register_replacements_conditioningavg()
+
+async def register_replacements_longeredge():
+    # No dynamic inputs here
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="ImageScaleToMaxDimension",
+            old_node_id="ResizeImagesByLongerEdge",
+            old_widget_ids=["longer_edge"],
+            input_mapping=[
+                {"new_id": "image", "old_id": "images"},
+                {"new_id": "largest_size", "old_id": "longer_edge"},
+                {"new_id": "upscale_method", "set_value": "lanczos"},
+            ],
+            # just to test the frontend output_mapping code, does nothing really here
+            output_mapping=[{"new_idx": 0, "old_idx": 0}],
+        ))
+
+async def register_replacements_batchimages():
+    # BatchImages node uses Autogrow
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="BatchImagesNode",
+            old_node_id="ImageBatch",
+            input_mapping=[
+                {"new_id": "images.image0", "old_id": "image1"},
+                {"new_id": "images.image1", "old_id": "image2"},
+            ],
+        ))
+
+async def register_replacements_upscaleimage():
+    # ResizeImageMaskNode uses DynamicCombo
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="ResizeImageMaskNode",
+            old_node_id="ImageScaleBy",
+            old_widget_ids=["upscale_method", "scale_by"],
+            input_mapping=[
+                {"new_id": "input", "old_id": "image"},
+                {"new_id": "resize_type", "set_value": "scale by multiplier"},
+                {"new_id": "resize_type.multiplier", "old_id": "scale_by"},
+                {"new_id": "scale_method", "old_id": "upscale_method"},
+            ],
+        ))
+
+async def register_replacements_controlnet():
+    # T2IAdapterLoader → ControlNetLoader
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="ControlNetLoader",
+            old_node_id="T2IAdapterLoader",
+            input_mapping=[
+                {"new_id": "control_net_name", "old_id": "t2i_adapter_name"},
+            ],
+        ))
+
+async def register_replacements_load3d():
+    # Load3DAnimation merged into Load3D
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="Load3D",
+            old_node_id="Load3DAnimation",
+        ))
+
+async def register_replacements_preview3d():
+    # Preview3DAnimation merged into Preview3D
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="Preview3D",
+            old_node_id="Preview3DAnimation",
+        ))
+
+async def register_replacements_svdimg2vid():
+    # Typo fix: SDV → SVD
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="SVD_img2vid_Conditioning",
+            old_node_id="SDV_img2vid_Conditioning",
+        ))
+
+async def register_replacements_conditioningavg():
+    # Typo fix: trailing space in node name
+    await api.node_replacement.register(io.NodeReplace(
+            new_node_id="ConditioningAverage",
+            old_node_id="ConditioningAverage ",
+        ))
+
+class NodeReplacementsExtension(ComfyExtension):
+    async def on_load(self) -> None:
+        await register_replacements()
+
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return []
+
+async def comfy_entrypoint() -> NodeReplacementsExtension:
+    return NodeReplacementsExtension()
diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py
index 024a89391..aa2d88673 100644
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -4,6 +4,7 @@ import os
 import numpy as np
 import safetensors
 import torch
+import torch.nn as nn
 import torch.utils.checkpoint
 from tqdm.auto import trange
 from PIL import Image, ImageDraw, ImageFont
@@ -27,6 +28,11 @@ class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic):
     """
     CFGGuider with modifications for training specific logic
     """
+
+    def __init__(self, *args, offloading=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.offloading = offloading
+
     def outer_sample(
         self,
         noise,
@@ -45,9 +51,11 @@ class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic):
                 noise.shape,
                 self.conds,
                 self.model_options,
-                force_full_load=True,  # mirror behavior in TrainLoraNode.execute() to keep model loaded
+                force_full_load=not self.offloading,
+                force_offload=self.offloading,
             )
         )
+        torch.cuda.empty_cache()
         device = self.model_patcher.load_device
 
         if denoise_mask is not None:
@@ -404,16 +412,97 @@ def find_all_highest_child_module_with_forward(
     return result
 
 
-def patch(m):
+def find_modules_at_depth(
+    model: nn.Module, depth: int = 1, result=None, current_depth=0, name=None
+) -> list[nn.Module]:
+    """
+    Find modules at a specific depth level for gradient checkpointing.
+
+    Args:
+        model: The model to search
+        depth: Target depth level (1 = top-level blocks, 2 = their children, etc.)
+        result: Accumulator for results
+        current_depth: Current recursion depth
+        name: Current module name for logging
+
+    Returns:
+        List of modules at the target depth
+    """
+    if result is None:
+        result = []
+    name = name or "root"
+
+    # Skip container modules (they don't have meaningful forward)
+    is_container = isinstance(model, (nn.ModuleList, nn.Sequential, nn.ModuleDict))
+    has_forward = hasattr(model, "forward") and not is_container
+
+    if has_forward:
+        current_depth += 1
+        if current_depth == depth:
+            result.append(model)
+            logging.debug(f"Found module at depth {depth}: {name} ({model.__class__.__name__})")
+            return result
+
+    # Recurse into children
+    for next_name, child in model.named_children():
+        find_modules_at_depth(child, depth, result, current_depth, f"{name}.{next_name}")
+
+    return result
+
+
+class OffloadCheckpointFunction(torch.autograd.Function):
+    """
+    Gradient checkpointing that works with weight offloading.
+
+    Forward: no_grad -> compute -> weights can be freed
+    Backward: enable_grad -> recompute -> backward -> weights can be freed
+
+    For single input, single output modules (Linear, Conv*).
+    """
+
+    @staticmethod
+    def forward(ctx, x: torch.Tensor, forward_fn):
+        ctx.save_for_backward(x)
+        ctx.forward_fn = forward_fn
+        with torch.no_grad():
+            return forward_fn(x)
+
+    @staticmethod
+    def backward(ctx, grad_out: torch.Tensor):
+        x, = ctx.saved_tensors
+        forward_fn = ctx.forward_fn
+
+        # Clear context early
+        ctx.forward_fn = None
+
+        with torch.enable_grad():
+            x_detached = x.detach().requires_grad_(True)
+            y = forward_fn(x_detached)
+            y.backward(grad_out)
+            grad_x = x_detached.grad
+
+        # Explicit cleanup
+        del y, x_detached, forward_fn
+
+        return grad_x, None
+
+
+def patch(m, offloading=False):
     if not hasattr(m, "forward"):
         return
     org_forward = m.forward
 
-    def fwd(args, kwargs):
-        return org_forward(*args, **kwargs)
+    # Branch 1: Linear/Conv* -> offload-compatible checkpoint (single input/output)
+    if offloading and isinstance(m, (nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+        def checkpointing_fwd(x):
+            return OffloadCheckpointFunction.apply(x, org_forward)
+    # Branch 2: Others -> standard checkpoint
+    else:
+        def fwd(args, kwargs):
+            return org_forward(*args, **kwargs)
 
-    def checkpointing_fwd(*args, **kwargs):
-        return torch.utils.checkpoint.checkpoint(fwd, args, kwargs, use_reentrant=False)
+        def checkpointing_fwd(*args, **kwargs):
+            return torch.utils.checkpoint.checkpoint(fwd, args, kwargs, use_reentrant=False)
 
     m.org_forward = org_forward
     m.forward = checkpointing_fwd
@@ -936,6 +1025,18 @@ class TrainLoraNode(io.ComfyNode):
                     default=True,
                     tooltip="Use gradient checkpointing for training.",
                 ),
+                io.Int.Input(
+                    "checkpoint_depth",
+                    default=1,
+                    min=1,
+                    max=5,
+                    tooltip="Depth level for gradient checkpointing.",
+                ),
+                io.Boolean.Input(
+                    "offloading",
+                    default=False,
+                    tooltip="Offload the Model to RAM. Requires Bypass Mode.",
+                ),
                 io.Combo.Input(
                     "existing_lora",
                     options=folder_paths.get_filename_list("loras") + ["[None]"],
@@ -982,6 +1083,8 @@ class TrainLoraNode(io.ComfyNode):
         lora_dtype,
         algorithm,
         gradient_checkpointing,
+        checkpoint_depth,
+        offloading,
         existing_lora,
         bucket_mode,
         bypass_mode,
@@ -1000,6 +1103,8 @@ class TrainLoraNode(io.ComfyNode):
         lora_dtype = lora_dtype[0]
         algorithm = algorithm[0]
         gradient_checkpointing = gradient_checkpointing[0]
+        offloading = offloading[0]
+        checkpoint_depth = checkpoint_depth[0]
         existing_lora = existing_lora[0]
         bucket_mode = bucket_mode[0]
         bypass_mode = bypass_mode[0]
@@ -1019,6 +1124,15 @@ class TrainLoraNode(io.ComfyNode):
         lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
         mp.set_model_compute_dtype(dtype)
 
+        if mp.is_dynamic():
+            if not bypass_mode:
+                logging.info("Training MP is Dynamic - forcing bypass mode. Start comfy with --highvram to force weight diff mode")
+                bypass_mode = True
+            offloading = True
+        elif offloading:
+            if not bypass_mode:
+                logging.info("Training Offload selected - forcing bypass mode. Set bypass = True to remove this message")
+
         # Prepare latents and compute counts
         latents, num_images, multi_res = _prepare_latents_and_count(
             latents, dtype, bucket_mode
@@ -1054,16 +1168,18 @@ class TrainLoraNode(io.ComfyNode):
 
             # Setup gradient checkpointing
             if gradient_checkpointing:
-                for m in find_all_highest_child_module_with_forward(
-                    mp.model.diffusion_model
-                ):
-                    patch(m)
+                modules_to_patch = find_modules_at_depth(
+                    mp.model.diffusion_model, depth=checkpoint_depth
+                )
+                logging.info(f"Gradient checkpointing: patching {len(modules_to_patch)} modules at depth {checkpoint_depth}")
+                for m in modules_to_patch:
+                    patch(m, offloading=offloading)
 
             torch.cuda.empty_cache()
             # With force_full_load=False we should be able to have offloading
             # But for offloading in training we need custom AutoGrad hooks for fwd/bwd
             comfy.model_management.load_models_gpu(
-                [mp], memory_required=1e20, force_full_load=True
+                [mp], memory_required=1e20, force_full_load=not offloading
             )
             torch.cuda.empty_cache()
 
@@ -1100,7 +1216,7 @@ class TrainLoraNode(io.ComfyNode):
                 )
 
             # Setup guider
-            guider = TrainGuider(mp)
+            guider = TrainGuider(mp, offloading=offloading)
             guider.set_conds(positive)
 
             # Inject bypass hooks if bypass mode is enabled
@@ -1113,6 +1229,7 @@ class TrainLoraNode(io.ComfyNode):
 
             # Run training loop
             try:
+                comfy.model_management.in_training = True
                 _run_training_loop(
                     guider,
                     train_sampler,
@@ -1123,6 +1240,7 @@ class TrainLoraNode(io.ComfyNode):
                     multi_res,
                 )
             finally:
+                comfy.model_management.in_training = False
                 # Eject bypass hooks if they were injected
                 if bypass_injections is not None:
                     for injection in bypass_injections:
@@ -1132,19 +1250,20 @@ class TrainLoraNode(io.ComfyNode):
                     unpatch(m)
             del train_sampler, optimizer
 
-            # Finalize adapters
+            for param in lora_sd:
+                lora_sd[param] = lora_sd[param].to(lora_dtype).detach()
+
             for adapter in all_weight_adapters:
                 adapter.requires_grad_(False)
-
-            for param in lora_sd:
-                lora_sd[param] = lora_sd[param].to(lora_dtype)
+                del adapter
+            del all_weight_adapters
 
             # mp in train node is highly specialized for training
             # use it in inference will result in bad behavior so we don't return it
             return io.NodeOutput(lora_sd, loss_map, steps + existing_steps)
 
 
-class LoraModelLoader(io.ComfyNode):#
+class LoraModelLoader(io.ComfyNode):
     @classmethod
     def define_schema(cls):
         return io.Schema(
@@ -1166,6 +1285,11 @@ class LoraModelLoader(io.ComfyNode):#
                     max=100.0,
                     tooltip="How strongly to modify the diffusion model. This value can be negative.",
                 ),
+                io.Boolean.Input(
+                    "bypass",
+                    default=False,
+                    tooltip="When enabled, applies LoRA in bypass mode without modifying base model weights. Useful for training and when model weights are offloaded.",
+                ),
             ],
             outputs=[
                 io.Model.Output(
@@ -1175,13 +1299,18 @@ class LoraModelLoader(io.ComfyNode):#
         )
 
     @classmethod
-    def execute(cls, model, lora, strength_model):
+    def execute(cls, model, lora, strength_model, bypass=False):
         if strength_model == 0:
             return io.NodeOutput(model)
 
-        model_lora, _ = comfy.sd.load_lora_for_models(
-            model, None, lora, strength_model, 0
-        )
+        if bypass:
+            model_lora, _ = comfy.sd.load_bypass_lora_for_models(
+                model, None, lora, strength_model, 0
+            )
+        else:
+            model_lora, _ = comfy.sd.load_lora_for_models(
+                model, None, lora, strength_model, 0
+            )
         return io.NodeOutput(model_lora)
 
 
diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py
index ccf7b63d3..cd765a7c1 100644
--- a/comfy_extras/nodes_video.py
+++ b/comfy_extras/nodes_video.py
@@ -202,6 +202,56 @@ class LoadVideo(io.ComfyNode):
 
         return True
 
+class VideoSlice(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Video Slice",
+            display_name="Video Slice",
+            search_aliases=[
+                "trim video duration",
+                "skip first frames",
+                "frame load cap",
+                "start time",
+            ],
+            category="image/video",
+            inputs=[
+                io.Video.Input("video"),
+                io.Float.Input(
+                    "start_time",
+                    default=0.0,
+                    max=1e5,
+                    min=-1e5,
+                    step=0.001,
+                    tooltip="Start time in seconds",
+                ),
+                io.Float.Input(
+                    "duration",
+                    default=0.0,
+                    min=0.0,
+                    step=0.001,
+                    tooltip="Duration in seconds, or 0 for unlimited duration",
+                ),
+                io.Boolean.Input(
+                    "strict_duration",
+                    default=False,
+                    tooltip="If True, when the specified duration is not possible, an error will be raised.",
+                ),
+            ],
+            outputs=[
+                io.Video.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, video: io.Video.Type, start_time: float, duration: float, strict_duration: bool) -> io.NodeOutput:
+        trimmed = video.as_trimmed(start_time, duration, strict_duration=strict_duration)
+        if trimmed is not None:
+            return io.NodeOutput(trimmed)
+        raise ValueError(
+            f"Failed to slice video:\nSource duration: {video.get_duration()}\nStart time: {start_time}\nTarget duration: {duration}"
+        )
+
 
 class VideoExtension(ComfyExtension):
     @override
@@ -212,6 +262,7 @@ class VideoExtension(ComfyExtension):
             CreateVideo,
             GetVideoComponents,
             LoadVideo,
+            VideoSlice,
         ]
 
 async def comfy_entrypoint() -> VideoExtension:
diff --git a/comfyui_version.py b/comfyui_version.py
index 706b37763..cf4e89816 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.12.3"
+__version__ = "0.13.0"
diff --git a/execution.py b/execution.py
index 3dbab82e6..f549a2f0f 100644
--- a/execution.py
+++ b/execution.py
@@ -13,8 +13,11 @@ from contextlib import nullcontext
 
 import torch
 
+from comfy.cli_args import args
 import comfy.memory_management
 import comfy.model_management
+import comfy_aimdo.model_vbar
+
 from latent_preview import set_preview_method
 import nodes
 from comfy_execution.caching import (
@@ -527,8 +530,10 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                     output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
                 finally:
                     if allocator is not None:
+                        if args.verbose == "DEBUG":
+                            comfy_aimdo.model_vbar.vbars_analyze()
                         comfy.model_management.reset_cast_buffers()
-                        torch.cuda.synchronize()
+                        comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
 
             if has_pending_tasks:
                 pending_async_nodes[unique_id] = output_data
@@ -618,6 +623,8 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
             logging.info("Memory summary: {}".format(comfy.model_management.debug_memory_summary()))
             logging.error("Got an OOM, unloading all loaded models.")
             comfy.model_management.unload_all_models()
+        elif isinstance(ex, RuntimeError) and ("mat1 and mat2 shapes" in str(ex)) and "Sampler" in class_type:
+            tips = "\n\nTIPS: If you have any \"Load CLIP\" or \"*CLIP Loader\" nodes in your workflow connected to this sampler node make sure the correct file(s) and type is selected."
 
         error_details = {
             "node_id": real_node_id,
diff --git a/nodes.py b/nodes.py
index 91de7a9d7..db5f98408 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2264,6 +2264,7 @@ async def load_custom_node(module_path: str, ignore=set(), module_parent="custom
                 if not isinstance(extension, ComfyExtension):
                     logging.warning(f"comfy_entrypoint in {module_path} did not return a ComfyExtension, skipping.")
                     return False
+                await extension.on_load()
                 node_list = await extension.get_node_list()
                 if not isinstance(node_list, list):
                     logging.warning(f"comfy_entrypoint in {module_path} did not return a list of nodes, skipping.")
@@ -2435,6 +2436,7 @@ async def init_builtin_extra_nodes():
         "nodes_lora_debug.py",
         "nodes_color.py",
         "nodes_toolkit.py",
+        "nodes_replacements.py",
     ]
 
     import_failed = []
diff --git a/pyproject.toml b/pyproject.toml
index f7925b92a..9dab9a50c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.12.3"
+version = "0.13.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
diff --git a/requirements.txt b/requirements.txt
index 5e34a2a49..e939e486a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-comfyui-frontend-package==1.38.13
-comfyui-workflow-templates==0.8.31
+comfyui-frontend-package==1.38.14
+comfyui-workflow-templates==0.8.38
 comfyui-embedded-docs==0.4.1
 torch
 torchsde
@@ -22,7 +22,7 @@ alembic
 SQLAlchemy
 av>=14.2.0
 comfy-kitchen>=0.2.7
-comfy-aimdo>=0.1.7
+comfy-aimdo>=0.1.8
 requests
 
 #non essential dependencies:
diff --git a/server.py b/server.py
index 2300393b2..8882e43c4 100644
--- a/server.py
+++ b/server.py
@@ -40,6 +40,7 @@ from app.user_manager import UserManager
 from app.model_manager import ModelFileManager
 from app.custom_node_manager import CustomNodeManager
 from app.subgraph_manager import SubgraphManager
+from app.node_replace_manager import NodeReplaceManager
 from typing import Optional, Union
 from api_server.routes.internal.internal_routes import InternalRoutes
 from protocol import BinaryEventTypes
@@ -204,6 +205,7 @@ class PromptServer():
         self.model_file_manager = ModelFileManager()
         self.custom_node_manager = CustomNodeManager()
         self.subgraph_manager = SubgraphManager()
+        self.node_replace_manager = NodeReplaceManager()
         self.internal_routes = InternalRoutes(self)
         self.supports = ["custom_nodes_from_web"]
         self.prompt_queue = execution.PromptQueue(self)
@@ -887,6 +889,8 @@ class PromptServer():
                 if "partial_execution_targets" in json_data:
                     partial_execution_targets = json_data["partial_execution_targets"]
 
+                self.node_replace_manager.apply_replacements(prompt)
+
                 valid = await execution.validate_prompt(prompt_id, prompt, partial_execution_targets)
                 extra_data = {}
                 if "extra_data" in json_data:
@@ -995,6 +999,7 @@ class PromptServer():
         self.model_file_manager.add_routes(self.routes)
         self.custom_node_manager.add_routes(self.routes, self.app, nodes.LOADED_MODULE_DIRS.items())
         self.subgraph_manager.add_routes(self.routes, nodes.LOADED_MODULE_DIRS.items())
+        self.node_replace_manager.add_routes(self.routes)
         self.app.add_subapp('/internal', self.internal_routes.get_app())
 
         # Prefix every route with /api for easier matching for delegation.
diff --git a/tests/execution/test_jobs.py b/tests/execution/test_jobs.py
index 4d2f9ed36..83c36fe48 100644
--- a/tests/execution/test_jobs.py
+++ b/tests/execution/test_jobs.py
@@ -5,8 +5,11 @@ from comfy_execution.jobs import (
     is_previewable,
     normalize_queue_item,
     normalize_history_item,
+    normalize_output_item,
+    normalize_outputs,
     get_outputs_summary,
     apply_sorting,
+    has_3d_extension,
 )
 
 
@@ -35,8 +38,8 @@ class TestIsPreviewable:
     """Unit tests for is_previewable()"""
 
     def test_previewable_media_types(self):
-        """Images, video, audio media types should be previewable."""
-        for media_type in ['images', 'video', 'audio']:
+        """Images, video, audio, 3d media types should be previewable."""
+        for media_type in ['images', 'video', 'audio', '3d']:
             assert is_previewable(media_type, {}) is True
 
     def test_non_previewable_media_types(self):
@@ -46,7 +49,7 @@ class TestIsPreviewable:
 
     def test_3d_extensions_previewable(self):
         """3D file extensions should be previewable regardless of media_type."""
-        for ext in ['.obj', '.fbx', '.gltf', '.glb']:
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
             item = {'filename': f'model{ext}'}
             assert is_previewable('files', item) is True
 
@@ -160,7 +163,7 @@ class TestGetOutputsSummary:
 
     def test_3d_files_previewable(self):
         """3D file extensions should be previewable."""
-        for ext in ['.obj', '.fbx', '.gltf', '.glb']:
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
             outputs = {
                 'node1': {
                     'files': [{'filename': f'model{ext}', 'type': 'output'}]
@@ -192,6 +195,64 @@ class TestGetOutputsSummary:
         assert preview['mediaType'] == 'images'
         assert preview['subfolder'] == 'outputs'
 
+    def test_string_3d_filename_creates_preview(self):
+        """String items with 3D extensions should synthesize a preview (Preview3D node output).
+        Only the .glb counts — nulls and non-file strings are excluded."""
+        outputs = {
+            'node1': {
+                'result': ['preview3d_abc123.glb', None, None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert count == 1
+        assert preview is not None
+        assert preview['filename'] == 'preview3d_abc123.glb'
+        assert preview['mediaType'] == '3d'
+        assert preview['nodeId'] == 'node1'
+        assert preview['type'] == 'output'
+
+    def test_string_non_3d_filename_no_preview(self):
+        """String items without 3D extensions should not create a preview."""
+        outputs = {
+            'node1': {
+                'result': ['data.json', None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert count == 0
+        assert preview is None
+
+    def test_string_3d_filename_used_as_fallback(self):
+        """String 3D preview should be used when no dict items are previewable."""
+        outputs = {
+            'node1': {
+                'latents': [{'filename': 'latent.safetensors'}],
+            },
+            'node2': {
+                'result': ['model.glb', None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert preview is not None
+        assert preview['filename'] == 'model.glb'
+        assert preview['mediaType'] == '3d'
+
+
+class TestHas3DExtension:
+    """Unit tests for has_3d_extension()"""
+
+    def test_recognized_extensions(self):
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
+            assert has_3d_extension(f'model{ext}') is True
+
+    def test_case_insensitive(self):
+        assert has_3d_extension('MODEL.GLB') is True
+        assert has_3d_extension('Scene.GLTF') is True
+
+    def test_non_3d_extensions(self):
+        for name in ['photo.png', 'video.mp4', 'data.json', 'model']:
+            assert has_3d_extension(name) is False
+
 
 class TestApplySorting:
     """Unit tests for apply_sorting()"""
@@ -395,3 +456,142 @@ class TestNormalizeHistoryItem:
             'prompt': {'nodes': {'1': {}}},
             'extra_data': {'create_time': 1234567890, 'client_id': 'abc'},
         }
+
+    def test_include_outputs_normalizes_3d_strings(self):
+        """Detail view should transform string 3D filenames into file output dicts."""
+        history_item = {
+            'prompt': (
+                5,
+                'prompt-3d',
+                {'nodes': {}},
+                {'create_time': 1234567890},
+                ['node1'],
+            ),
+            'status': {'status_str': 'success', 'completed': True, 'messages': []},
+            'outputs': {
+                'node1': {
+                    'result': ['preview3d_abc123.glb', None, None]
+                }
+            },
+        }
+        job = normalize_history_item('prompt-3d', history_item, include_outputs=True)
+
+        assert job['outputs_count'] == 1
+        result_items = job['outputs']['node1']['result']
+        assert len(result_items) == 1
+        assert result_items[0] == {
+            'filename': 'preview3d_abc123.glb',
+            'type': 'output',
+            'subfolder': '',
+            'mediaType': '3d',
+        }
+
+    def test_include_outputs_preserves_dict_items(self):
+        """Detail view normalization should pass dict items through unchanged."""
+        history_item = {
+            'prompt': (
+                5,
+                'prompt-img',
+                {'nodes': {}},
+                {'create_time': 1234567890},
+                ['node1'],
+            ),
+            'status': {'status_str': 'success', 'completed': True, 'messages': []},
+            'outputs': {
+                'node1': {
+                    'images': [
+                        {'filename': 'photo.png', 'type': 'output', 'subfolder': ''},
+                    ]
+                }
+            },
+        }
+        job = normalize_history_item('prompt-img', history_item, include_outputs=True)
+
+        assert job['outputs_count'] == 1
+        assert job['outputs']['node1']['images'] == [
+            {'filename': 'photo.png', 'type': 'output', 'subfolder': ''},
+        ]
+
+
+class TestNormalizeOutputItem:
+    """Unit tests for normalize_output_item()"""
+
+    def test_none_returns_none(self):
+        assert normalize_output_item(None) is None
+
+    def test_string_3d_extension_synthesizes_dict(self):
+        result = normalize_output_item('model.glb')
+        assert result == {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'}
+
+    def test_string_non_3d_extension_returns_none(self):
+        assert normalize_output_item('data.json') is None
+
+    def test_string_no_extension_returns_none(self):
+        assert normalize_output_item('camera_info_string') is None
+
+    def test_dict_passes_through(self):
+        item = {'filename': 'test.png', 'type': 'output'}
+        assert normalize_output_item(item) is item
+
+    def test_other_types_return_none(self):
+        assert normalize_output_item(42) is None
+        assert normalize_output_item(True) is None
+
+
+class TestNormalizeOutputs:
+    """Unit tests for normalize_outputs()"""
+
+    def test_empty_outputs(self):
+        assert normalize_outputs({}) == {}
+
+    def test_dict_items_pass_through(self):
+        outputs = {
+            'node1': {
+                'images': [{'filename': 'a.png', 'type': 'output'}],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == outputs
+
+    def test_3d_string_synthesized(self):
+        outputs = {
+            'node1': {
+                'result': ['model.glb', None, None],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == {
+            'node1': {
+                'result': [
+                    {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'},
+                ],
+            }
+        }
+
+    def test_animated_key_preserved(self):
+        outputs = {
+            'node1': {
+                'images': [{'filename': 'a.png', 'type': 'output'}],
+                'animated': [True],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result['node1']['animated'] == [True]
+
+    def test_non_dict_node_outputs_preserved(self):
+        outputs = {'node1': 'unexpected_value'}
+        result = normalize_outputs(outputs)
+        assert result == {'node1': 'unexpected_value'}
+
+    def test_none_items_filtered_but_other_types_preserved(self):
+        outputs = {
+            'node1': {
+                'result': ['data.json', None, [1, 2, 3]],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == {
+            'node1': {
+                'result': ['data.json', [1, 2, 3]],
+            }
+        }