diff --git a/comfy_extras/nodes_seedvr.py b/comfy_extras/nodes_seedvr.py
new file mode 100644
index 000000000..6bc2de17f
--- /dev/null
+++ b/comfy_extras/nodes_seedvr.py
@@ -0,0 +1,1164 @@
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+import torch
+import math
+import logging
+from einops import rearrange
+
+import gc
+import comfy.model_management
+import comfy.sample
+import comfy.samplers
+from comfy.ldm.seedvr.vae import (
+    adain_color_transfer,
+    lab_color_transfer,
+    wavelet_color_transfer,
+)
+
+from torchvision.transforms import functional as TVF
+from torchvision.transforms import Lambda
+from torchvision.transforms.functional import InterpolationMode
+
+
+_SEEDVR2_INVALID_MODEL_MSG_PREFIX = (
+    "SeedVR2Conditioning: model object does not match expected SeedVR2 structure"
+)
+LAB_SCALE_MULTIPLIER = 13
+WAVELET_SCALE_MULTIPLIER = 10
+ADAIN_SCALE_MULTIPLIER = 6
+COLOR_CORRECTION_MEMORY_HEADROOM = 0.75
+
+# Private sentinel for getattr default: distinguishes "attribute missing"
+# from "attribute present but None" so the failure message is accurate.
+_ATTR_MISSING = object()
+
+
+def _seedvr2_auto_chunk_attempts(t_latent, t_pixel, frames_per_chunk):
+    """Return stricter 4n+1 frame chunk sizes for auto OOM retries."""
+    attempts = [frames_per_chunk]
+    current_chunk_latent = (
+        t_latent if t_pixel <= frames_per_chunk
+        else (frames_per_chunk - 1) // 4 + 1
+    )
+    current_chunk_count = max(1, math.ceil(t_latent / current_chunk_latent))
+    seen = {frames_per_chunk}
+
+    for target_chunks in range(max(2, current_chunk_count + 1), t_latent + 1):
+        chunk_latent = max(1, math.ceil(t_latent / target_chunks))
+        candidate = 4 * (chunk_latent - 1) + 1
+        if candidate in seen:
+            continue
+        if candidate >= attempts[-1]:
+            continue
+        attempts.append(candidate)
+        seen.add(candidate)
+
+    return attempts
+
+
+def _resolve_seedvr2_diffusion_model(model):
+    """Resolve the inner SeedVR2 diffusion-model module from a ComfyUI model
+    patcher object. Fails loud with a ``RuntimeError`` whose message begins
+    with ``_SEEDVR2_INVALID_MODEL_MSG_PREFIX`` when the expected wrapper
+    shape (``model.model.diffusion_model``) is absent.
+
+    Distinguishes four failure modes via the ``_ATTR_MISSING`` sentinel:
+    ``model.model`` missing, ``model.model is None``,
+    ``model.model.diffusion_model`` missing, ``model.model.diffusion_model
+    is None``. Each mode produces an accurate error message rather than
+    conflating "attribute missing" with "attribute is None".
+    """
+    inner = getattr(model, "model", _ATTR_MISSING)
+    if inner is _ATTR_MISSING:
+        raise RuntimeError(
+            f"{_SEEDVR2_INVALID_MODEL_MSG_PREFIX}: input has no 'model' attribute "
+            f"(got type {type(model).__name__})."
+        )
+    if inner is None:
+        raise RuntimeError(
+            f"{_SEEDVR2_INVALID_MODEL_MSG_PREFIX}: input.model is None "
+            f"(input type {type(model).__name__})."
+        )
+    diffusion_model = getattr(inner, "diffusion_model", _ATTR_MISSING)
+    if diffusion_model is _ATTR_MISSING:
+        raise RuntimeError(
+            f"{_SEEDVR2_INVALID_MODEL_MSG_PREFIX}: 'model.model' has no "
+            f"'diffusion_model' attribute (got type {type(inner).__name__})."
+        )
+    if diffusion_model is None:
+        raise RuntimeError(
+            f"{_SEEDVR2_INVALID_MODEL_MSG_PREFIX}: 'model.model.diffusion_model' "
+            f"is None (model.model type {type(inner).__name__})."
+        )
+    return diffusion_model
+
+
+def _apply_rope_freqs_float32_cast(diffusion_model):
+    """Cast every nested module's ``rope.freqs`` parameter data to ``float32``
+    when it is not already in float32. Idempotency is per-tensor by dtype
+    check, NOT a per-instance sentinel attribute — a sentinel would survive
+    Comfy's dynamic model unload/reload cycle while ``rope.freqs`` itself
+    is restored from the archived dtype, leaving RoPE running in fp16/bf16
+    on subsequent calls. The dtype check makes the cast self-correcting
+    against weight-restore lifecycle events. Iteration cost is one walk of
+    the diffusion-model module tree per ``execute()`` call (microseconds).
+    """
+    for module in diffusion_model.modules():
+        if hasattr(module, 'rope') and hasattr(module.rope, 'freqs'):
+            if module.rope.freqs.data.dtype != torch.float32:
+                module.rope.freqs.data = module.rope.freqs.data.to(torch.float32)
+
+
+def clear_vae_memory(vae_model):
+    for module in vae_model.modules():
+        if hasattr(module, "memory"):
+            module.memory = None
+    gc.collect()
+    comfy.model_management.soft_empty_cache()
+
+def expand_dims(tensor, ndim):
+    shape = tensor.shape + (1,) * (ndim - tensor.ndim)
+    return tensor.reshape(shape)
+
+def get_conditions(latent, latent_blur):
+    t, h, w, c = latent.shape
+    cond = torch.ones([t, h, w, c + 1], device=latent.device, dtype=latent.dtype)
+    cond[:, ..., :-1] = latent_blur[:]
+    cond[:, ..., -1:] = 1.0
+    return cond
+
+def timestep_transform(timesteps, latents_shapes):
+    vt = 4
+    vs = 8
+    frames = (latents_shapes[:, 0] - 1) * vt + 1
+    heights = latents_shapes[:, 1] * vs
+    widths = latents_shapes[:, 2] * vs
+
+    # Compute shift factor.
+    def get_lin_function(x1, y1, x2, y2):
+        m = (y2 - y1) / (x2 - x1)
+        b = y1 - m * x1
+        return lambda x: m * x + b
+
+    img_shift_fn = get_lin_function(x1=256 * 256, y1=1.0, x2=1024 * 1024, y2=3.2)
+    vid_shift_fn = get_lin_function(x1=256 * 256 * 37, y1=1.0, x2=1280 * 720 * 145, y2=5.0)
+    shift = torch.where(
+        frames > 1,
+        vid_shift_fn(heights * widths * frames),
+        img_shift_fn(heights * widths),
+    ).to(timesteps.device)
+
+    # Shift timesteps.
+    T = 1000.0
+    timesteps = timesteps / T
+    timesteps = shift * timesteps / (1 + (shift - 1) * timesteps)
+    timesteps = timesteps * T
+    return timesteps
+
+def inter(x_0, x_T, t):
+    t = expand_dims(t, x_0.ndim)
+    T = 1000.0
+    B = lambda t: t / T
+    A = lambda t: 1 - (t / T)
+    return A(t) * x_0 + B(t) * x_T
+def area_resize(image, max_area):
+
+    height, width = image.shape[-2:]
+    scale = math.sqrt(max_area / (height * width))
+
+    resized_height, resized_width = round(height * scale), round(width * scale)
+
+    return TVF.resize(
+        image,
+        size=(resized_height, resized_width),
+        interpolation=InterpolationMode.BICUBIC,
+    )
+
+def div_pad(image, factor):
+
+    height_factor, width_factor = factor
+    height, width = image.shape[-2:]
+
+    pad_height = (height_factor - (height % height_factor)) % height_factor
+    pad_width = (width_factor - (width % width_factor)) % width_factor
+
+    if pad_height == 0 and pad_width == 0:
+        return image
+
+    if isinstance(image, torch.Tensor):
+        padding = (0, pad_width, 0, pad_height)
+        image = torch.nn.functional.pad(image, padding, mode='constant', value=0.0)
+
+    return image
+
+def cut_videos(videos):
+    t = videos.size(1)
+    if t == 1:
+        return videos
+    if t <= 4 :
+        padding = [videos[:, -1].unsqueeze(1)] * (4 - t + 1)
+        padding = torch.cat(padding, dim=1)
+        videos = torch.cat([videos, padding], dim=1)
+        return videos
+    if (t - 1) % (4) == 0:
+        return videos
+    else:
+        padding = [videos[:, -1].unsqueeze(1)] * (
+            4 - ((t - 1) % (4))
+        )
+        padding = torch.cat(padding, dim=1)
+        videos = torch.cat([videos, padding], dim=1)
+        assert (videos.size(1) - 1) % (4) == 0
+        return videos
+
+def side_resize(image, size):
+    antialias = not (isinstance(image, torch.Tensor) and image.device.type == 'mps')
+    resized = TVF.resize(image, size, InterpolationMode.BICUBIC, antialias=antialias)
+    return resized
+
+
+def _seedvr2_input_shorter_edge(images, node_name):
+    if images.dim() == 4:
+        return min(images.shape[1], images.shape[2])
+    if images.dim() == 5:
+        return min(images.shape[2], images.shape[3])
+    raise ValueError(
+        f"{node_name}: expected 4-D or 5-D IMAGE tensor, "
+        f"got shape {tuple(images.shape)}"
+    )
+
+
+def _seedvr2_resize_and_pad(images, upscaled_shorter_edge, node_name):
+    if upscaled_shorter_edge < 2:
+        raise ValueError(
+            f"{node_name}: resolved upscaled_shorter_edge must be at least 2 pixels; "
+            f"got {upscaled_shorter_edge}."
+        )
+    original_image = images
+    if images.dim() == 4:
+        # Comfy video components arrive as a 4-D IMAGE frame sequence:
+        # (frames, H, W, C). SeedVR2 consumes that as one video.
+        images = images.unsqueeze(0)
+    elif images.dim() != 5:
+        raise ValueError(
+            f"{node_name}: expected 4-D or 5-D IMAGE tensor, "
+            f"got shape {tuple(images.shape)}"
+        )
+    images = images.permute(0, 1, 4, 2, 3)
+
+    b, t, c, h, w = images.shape
+    images = images.reshape(b * t, c, h, w)
+
+    clip = Lambda(lambda x: torch.clamp(x, 0.0, 1.0))
+    images = side_resize(images, upscaled_shorter_edge)
+
+    images = clip(images)
+    images = div_pad(images, (16, 16))
+    _, _, new_h, new_w = images.shape
+
+    images = images.reshape(b, t, c, new_h, new_w)
+    images = cut_videos(images)
+    images_bthwc = rearrange(images, "b t c h w -> b t h w c")
+
+    return io.NodeOutput(images_bthwc, original_image, upscaled_shorter_edge)
+
+
+class SeedVR2Resize(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2Resize",
+            category="image/video",
+            inputs=[
+                io.Image.Input("images"),
+                io.Float.Input("multiplier", default=4.0, min=0.01),
+            ],
+            outputs=[
+                io.Image.Output("input_pixels"),
+                io.Image.Output("original_image"),
+                io.Int.Output("upscaled_shorter_edge"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, images, multiplier=4.0):
+        if multiplier <= 0:
+            raise ValueError(
+                f"SeedVR2Resize: multiplier must be > 0; got {multiplier}."
+            )
+        shorter_edge = _seedvr2_input_shorter_edge(images, "SeedVR2Resize")
+        upscaled_shorter_edge = int(round(shorter_edge * multiplier))
+        if upscaled_shorter_edge < 2:
+            raise ValueError(
+                "SeedVR2Resize: multiplier resolved upscaled_shorter_edge "
+                f"to {upscaled_shorter_edge}; use a multiplier that resolves "
+                "to at least 2 pixels."
+            )
+        return _seedvr2_resize_and_pad(
+            images, upscaled_shorter_edge, "SeedVR2Resize",
+        )
+
+
+class SeedVR2ResizeAdvanced(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2ResizeAdvanced",
+            category="image/video",
+            inputs=[
+                io.Image.Input("images"),
+                io.Int.Input("shorter_edge", default=1280, min=2),
+            ],
+            outputs=[
+                io.Image.Output("input_pixels"),
+                io.Image.Output("original_image"),
+                io.Int.Output("upscaled_shorter_edge"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, images, shorter_edge):
+        return _seedvr2_resize_and_pad(
+            images, shorter_edge, "SeedVR2ResizeAdvanced",
+        )
+
+
+class SeedVR2PostProcessing(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2PostProcessing",
+            category="image/video",
+            inputs=[
+                io.Image.Input("decoded"),
+                io.Image.Input("original_image"),
+                io.Int.Input("upscaled_shorter_edge", min=2, force_input=True),
+                io.Combo.Input("color_correction_method", options=["lab", "wavelet", "adain", "none"], default="lab"),
+            ],
+            outputs=[io.Image.Output()],
+        )
+
+    @classmethod
+    def execute(cls, decoded, original_image, upscaled_shorter_edge, color_correction_method):
+        cls._validate_upscaled_shorter_edge(upscaled_shorter_edge)
+        decoded_5d, decoded_was_4d = cls._as_bthwc(decoded)
+        original_5d, _ = cls._as_bthwc(original_image)
+        decoded_5d = cls._restore_reference_batch_time(decoded_5d, original_5d)
+
+        b = min(decoded_5d.shape[0], original_5d.shape[0])
+        t = min(decoded_5d.shape[1], original_5d.shape[1])
+        reference_h, reference_w = cls._resized_shorter_edge_dims(
+            original_5d.shape[2], original_5d.shape[3], upscaled_shorter_edge,
+        )
+
+        decoded_5d = decoded_5d[:b, :t, :, :, :]
+        target_h = min(decoded_5d.shape[2], reference_h)
+        target_w = min(decoded_5d.shape[3], reference_w)
+        decoded_5d = decoded_5d[:, :, :target_h, :target_w, :]
+        if color_correction_method in ("lab", "wavelet", "adain"):
+            reference_5d = cls._resize_original_reference(original_image, upscaled_shorter_edge)
+            reference_5d = reference_5d[:b, :t, :, :, :]
+            reference_5d = cls._resize_reference(reference_5d, target_h, target_w)
+            output_device = decoded_5d.device
+            decoded_raw = cls._to_seedvr2_raw(decoded_5d)
+            reference_raw = cls._to_seedvr2_raw(reference_5d)
+            decoded_flat = rearrange(decoded_raw, "b t h w c -> (b t) c h w")
+            reference_flat = rearrange(reference_raw, "b t h w c -> (b t) c h w")
+            output = cls._color_transfer_chunked(
+                decoded_flat, reference_flat, output_device, color_correction_method,
+            )
+            output = rearrange(output, "(b t) c h w -> b t h w c", b=b, t=t)
+            output = output.add(1.0).div(2.0).clamp(0.0, 1.0)
+        elif color_correction_method == "none":
+            output = decoded_5d
+        else:
+            raise ValueError(f"SeedVR2PostProcessing: unknown color_correction_method {color_correction_method!r}")
+
+        h2 = output.shape[-3] - (output.shape[-3] % 2)
+        w2 = output.shape[-2] - (output.shape[-2] % 2)
+        output = output[:, :, :h2, :w2, :]
+        if decoded_was_4d:
+            output = output.reshape(-1, output.shape[-3], output.shape[-2], output.shape[-1])
+        return io.NodeOutput(output)
+
+    @staticmethod
+    def _as_bthwc(images):
+        if images.ndim == 4:
+            return images.unsqueeze(0), True
+        if images.ndim == 5:
+            return images, False
+        raise ValueError(
+            f"SeedVR2PostProcessing: expected 4-D or 5-D IMAGE tensor, got shape {tuple(images.shape)}"
+        )
+
+    @staticmethod
+    def _restore_reference_batch_time(decoded, reference):
+        if decoded.shape[0] != 1:
+            return decoded
+        ref_b, ref_t = reference.shape[:2]
+        if ref_b < 1 or decoded.shape[1] % ref_b != 0:
+            return decoded
+        decoded_t = decoded.shape[1] // ref_b
+        if decoded_t < ref_t:
+            return decoded
+        return decoded.reshape(ref_b, decoded_t, decoded.shape[2], decoded.shape[3], decoded.shape[4])
+
+    @staticmethod
+    def _to_seedvr2_raw(images):
+        return images.mul(2.0).sub(1.0)
+
+    @staticmethod
+    def _validate_upscaled_shorter_edge(upscaled_shorter_edge):
+        if not isinstance(upscaled_shorter_edge, int) or upscaled_shorter_edge < 2:
+            raise ValueError(
+                "SeedVR2PostProcessing: upscaled_shorter_edge must be an integer "
+                f"of at least 2 pixels; got {upscaled_shorter_edge!r}."
+            )
+
+    @staticmethod
+    def _resized_shorter_edge_dims(height, width, upscaled_shorter_edge):
+        if height <= width:
+            return upscaled_shorter_edge, int(upscaled_shorter_edge * width / height)
+        return int(upscaled_shorter_edge * height / width), upscaled_shorter_edge
+
+    @classmethod
+    def _resize_original_reference(cls, original, upscaled_shorter_edge):
+        original_5d, _ = cls._as_bthwc(original)
+        b, t = original_5d.shape[:2]
+        original_flat = rearrange(original_5d, "b t h w c -> (b t) c h w")
+        resized_flat = side_resize(original_flat, upscaled_shorter_edge).clamp(0.0, 1.0)
+        return rearrange(resized_flat, "(b t) c h w -> b t h w c", b=b, t=t)
+
+    @staticmethod
+    def _color_transfer_on_vae_device(decoded_flat, reference_flat, output_device, transfer_fn):
+        color_device = comfy.model_management.vae_device()
+        decoded_flat = decoded_flat.to(device=color_device)
+        reference_flat = reference_flat.to(device=color_device)
+        output = transfer_fn(decoded_flat, reference_flat)
+        return output.to(device=output_device)
+
+    @staticmethod
+    def _lab_color_transfer_on_vae_device(decoded_flat, reference_flat, output_device):
+        color_device = comfy.model_management.vae_device()
+        result = None
+        for start in range(decoded_flat.shape[0]):
+            decoded_frame = decoded_flat[start:start + 1].to(device=color_device).clone()
+            reference_frame = reference_flat[start:start + 1].to(device=color_device).clone()
+            output = lab_color_transfer(decoded_frame, reference_frame).to(device=output_device)
+            if result is None:
+                result = torch.empty(
+                    (decoded_flat.shape[0],) + tuple(output.shape[1:]),
+                    device=output_device,
+                    dtype=output.dtype,
+                )
+            result[start:start + 1].copy_(output)
+        if result is None:
+            raise ValueError("SeedVR2PostProcessing: LAB color correction requires at least one frame.")
+        return result
+
+    @classmethod
+    def _color_transfer_chunked(cls, decoded_flat, reference_flat, output_device, color_correction_method):
+        chunk_size = cls._estimate_color_correction_chunk_size(decoded_flat, color_correction_method)
+        while True:
+            next_chunk_size = None
+            try:
+                return cls._run_color_transfer_chunks(
+                    decoded_flat, reference_flat, output_device, color_correction_method, chunk_size,
+                )
+            except Exception as e:
+                comfy.model_management.raise_non_oom(e)
+                if chunk_size <= 1:
+                    raise RuntimeError(
+                        "SeedVR2PostProcessing: color correction OOM at one frame; "
+                        f"color_correction_method={color_correction_method}, shape={tuple(decoded_flat.shape)}."
+                    ) from e
+                next_chunk_size = max(1, chunk_size // 2)
+
+            comfy.model_management.soft_empty_cache()
+            chunk_size = next_chunk_size
+
+    @classmethod
+    def _run_color_transfer_chunks(cls, decoded_flat, reference_flat, output_device, color_correction_method, chunk_size):
+        result = None
+        for start in range(0, decoded_flat.shape[0], chunk_size):
+            end = min(start + chunk_size, decoded_flat.shape[0])
+            decoded_chunk = decoded_flat[start:end]
+            reference_chunk = reference_flat[start:end]
+            if color_correction_method == "lab":
+                output = cls._lab_color_transfer_on_vae_device(decoded_chunk, reference_chunk, output_device)
+            elif color_correction_method == "wavelet":
+                output = cls._color_transfer_on_vae_device(
+                    decoded_chunk, reference_chunk, output_device, wavelet_color_transfer,
+                )
+            else:
+                output = cls._color_transfer_on_vae_device(
+                    decoded_chunk, reference_chunk, output_device, adain_color_transfer,
+                )
+            if result is None:
+                result = torch.empty(
+                    (decoded_flat.shape[0],) + tuple(output.shape[1:]),
+                    device=output_device,
+                    dtype=output.dtype,
+                )
+            result[start:end].copy_(output)
+        if result is None:
+            raise ValueError("SeedVR2PostProcessing: color correction requires at least one frame.")
+        return result
+
+    @classmethod
+    def _estimate_color_correction_chunk_size(cls, decoded_flat, color_correction_method):
+        multiplier = cls._color_correction_memory_multiplier(color_correction_method)
+        frames = decoded_flat.shape[0]
+        _, channels, height, width = decoded_flat.shape
+        dtype_bytes = max(decoded_flat.element_size(), 4)
+        bytes_per_frame = height * width * channels * dtype_bytes * multiplier
+        if bytes_per_frame <= 0:
+            return frames
+        color_device = comfy.model_management.vae_device()
+        free_memory = comfy.model_management.get_free_memory(color_device)
+        chunk_size = int((free_memory * COLOR_CORRECTION_MEMORY_HEADROOM) // bytes_per_frame)
+        return max(1, min(frames, chunk_size))
+
+    @staticmethod
+    def _color_correction_memory_multiplier(color_correction_method):
+        if color_correction_method == "lab":
+            return LAB_SCALE_MULTIPLIER
+        if color_correction_method == "wavelet":
+            return WAVELET_SCALE_MULTIPLIER
+        if color_correction_method == "adain":
+            return ADAIN_SCALE_MULTIPLIER
+        raise ValueError(f"SeedVR2PostProcessing: unknown color_correction_method {color_correction_method!r}")
+
+    @staticmethod
+    def _resize_reference(reference, height, width):
+        if reference.shape[2] == height and reference.shape[3] == width:
+            return reference
+        b, t = reference.shape[:2]
+        reference_flat = rearrange(reference, "b t h w c -> (b t) c h w")
+        resized = TVF.resize(
+            reference_flat,
+            size=(height, width),
+            interpolation=InterpolationMode.BICUBIC,
+            antialias=not (isinstance(reference_flat, torch.Tensor) and reference_flat.device.type == "mps"),
+        )
+        return rearrange(resized, "(b t) c h w -> b t h w c", b=b, t=t)
+
+
+class SeedVR2Conditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2Conditioning",
+            category="image/video",
+            inputs=[
+                io.Model.Input("model"),
+                io.Latent.Input("vae_conditioning", display_name="LATENT"),
+            ],
+            outputs=[
+                io.Model.Output(display_name = "model"),
+                io.Conditioning.Output(display_name = "positive"),
+                io.Conditioning.Output(display_name = "negative"),
+                io.Latent.Output(display_name = "latent"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, vae_conditioning) -> io.NodeOutput:
+
+        vae_conditioning = vae_conditioning["samples"]
+        if vae_conditioning.ndim != 5:
+            raise ValueError(
+                "SeedVR2Conditioning expects a 5-D VAE latent in Comfy "
+                f"channel-first layout; got shape {tuple(vae_conditioning.shape)}."
+            )
+        if vae_conditioning.shape[-1] == _SEEDVR2_LATENT_CHANNELS and vae_conditioning.shape[1] != _SEEDVR2_LATENT_CHANNELS:
+            raise ValueError(
+                "SeedVR2Conditioning expects SeedVR2 VAE latents in Comfy "
+                f"channel-first layout (B, {_SEEDVR2_LATENT_CHANNELS}, T, H, W); "
+                f"got channel-last shape {tuple(vae_conditioning.shape)}."
+            )
+        vae_conditioning = vae_conditioning.movedim(1, -1).contiguous()
+        model_patcher = model
+        model = _resolve_seedvr2_diffusion_model(model_patcher)
+        pos_cond = model.positive_conditioning
+        neg_cond = model.negative_conditioning
+
+        # Fail-loud guard against silently-wrong output when a numz-format
+        # DiT-only ``.safetensors`` (no ``positive_conditioning`` /
+        # ``negative_conditioning`` keys) is loaded via ``UNETLoader``.
+        # ``NaDiT.__init__`` zero-fills the buffers via ``torch.zeros`` (see
+        # ``comfy/ldm/seedvr/model.py``); ``load_state_dict(strict=False)``
+        # leaves them at zero when the keys are absent. Detect that state
+        # here rather than at ``BaseModel.extra_conds`` (per sampling step,
+        # wasteful) or at the resolver helper (mixes structural shape with
+        # semantic content). Both buffers must be checked together — partial
+        # bake regressions could populate one but not the other.
+        if (
+            pos_cond.float().abs().sum().item() == 0
+            and neg_cond.float().abs().sum().item() == 0
+        ):
+            raise RuntimeError(
+                f"{_SEEDVR2_INVALID_MODEL_MSG_PREFIX}: positive_conditioning "
+                f"and negative_conditioning buffers are zero-valued — model "
+                f"file appears to be a numz-format DiT-only export missing "
+                f"the SeedVR2 conditioning tensors. "
+                f"Re-bake the file with ``positive_conditioning`` (58, 5120) "
+                f"and ``negative_conditioning`` (64, 5120) keys at top level, "
+                f"or load via CheckpointLoaderSimple from a bundled "
+                f"checkpoint."
+            )
+
+        _apply_rope_freqs_float32_cast(model)
+
+        condition = torch.stack([get_conditions(c, c) for c in vae_conditioning])
+        condition = condition.movedim(-1, 1)
+        latent = vae_conditioning.movedim(-1, 1)
+
+        latent = rearrange(latent, "b c t h w -> b (c t) h w")
+        condition = rearrange(condition, "b c t h w -> b (c t) h w")
+
+        negative = [[neg_cond.unsqueeze(0), {"condition": condition}]]
+        positive = [[pos_cond.unsqueeze(0), {"condition": condition}]]
+
+        return io.NodeOutput(model_patcher, positive, negative, {"samples": latent})
+
+# SeedVR2 latent / conditioning channel constants. The SeedVR2 conditioning
+# stage collapses ``(B, C, T, H, W) -> (B, C*T, H, W)`` for both the latent
+# (C=16) and the per-frame condition tensor (C=17 = 16 latent + 1 mask), as
+# required by ``NaDiT.forward`` which un-collapses via
+# ``view(B, 16, -1, H, W)`` and ``view(B, 17, -1, H, W)`` respectively.
+_SEEDVR2_LATENT_CHANNELS = 16
+_SEEDVR2_CONDITION_CHANNELS = 17
+
+
+def _slice_collapsed_4d_along_t(tensor_4d: torch.Tensor, t_start: int,
+                                 t_end: int, channels: int) -> torch.Tensor:
+    """Slice a SeedVR2-style collapsed 4D tensor ``(B, channels*T, H, W)``
+    along the latent T axis, returning ``(B, channels*(t_end - t_start), H, W)``.
+
+    Reshape -> slice -> ``.contiguous()`` -> re-collapse. ``reshape`` is
+    used for the un-collapse so non-contiguous incoming tensors from
+    cropping or slicing nodes are accepted. The
+    ``.contiguous()`` is mandatory: T-axis slicing of a 5D tensor produces a
+    non-contiguous view, and the subsequent re-collapse requires contiguous
+    storage.
+    """
+    B, CT, H, W = tensor_4d.shape
+    if CT % channels != 0:
+        raise ValueError(
+            f"_slice_collapsed_4d_along_t: collapsed channel dim {CT} is not "
+            f"divisible by channels={channels}; tensor shape {tuple(tensor_4d.shape)}."
+        )
+    T = CT // channels
+    if not (0 <= t_start < t_end <= T):
+        raise ValueError(
+            f"_slice_collapsed_4d_along_t: slice [{t_start}:{t_end}] out of "
+            f"range for T={T}."
+        )
+    new_T = t_end - t_start
+    sliced = tensor_4d.reshape(B, channels, T, H, W)[:, :, t_start:t_end, :, :].contiguous()
+    return sliced.reshape(B, channels * new_T, H, W)
+
+
+def _slice_seedvr2_cond_along_t(cond_list, t_start: int, t_end: int):
+    """Build a new SeedVR2 conditioning list with the per-frame ``condition``
+    tensor sliced along the latent T axis.
+
+    SeedVR2 conditioning entries have the shape
+    ``[text_cond_tensor, options_dict]`` where ``options_dict["condition"]``
+    is a 4D collapsed ``(B, 17*T, H, W)`` tensor; the text tensor itself has
+    no temporal axis and is passed through unchanged. Other keys in the
+    options dict (controlnets, etc.) are also passed through unchanged. If
+    an entry has no ``"condition"`` key, the entry is forwarded verbatim.
+
+    A new list of ``[text_cond, new_options_dict]`` pairs is returned; the
+    original ``cond_list`` and its options dicts are not mutated.
+    """
+    new_list = []
+    for entry in cond_list:
+        text_cond, options = entry[0], entry[1]
+        if "condition" not in options:
+            new_list.append(entry)
+            continue
+        new_options = options.copy()
+        new_options["condition"] = _slice_collapsed_4d_along_t(
+            new_options["condition"], t_start, t_end,
+            _SEEDVR2_CONDITION_CHANNELS,
+        )
+        new_list.append([text_cond, new_options])
+    return new_list
+
+
+def _slice_seedvr2_noise_mask_along_t(noise_mask: torch.Tensor,
+                                      samples_4d: torch.Tensor,
+                                      t_start: int,
+                                      t_end: int):
+    """Slice collapsed SeedVR2 masks and preserve standard masks.
+
+    ``SetLatentNoiseMask`` produces ``(B, 1, H, W)`` masks that KSampler
+    expands to the latent shape. Only masks already expanded to the full
+    collapsed ``(B, 16*T, H, W)`` shape need temporal slicing here.
+    """
+    if noise_mask.ndim == samples_4d.ndim and noise_mask.shape[1] == samples_4d.shape[1]:
+        return _slice_collapsed_4d_along_t(
+            noise_mask, t_start, t_end, _SEEDVR2_LATENT_CHANNELS,
+        )
+    return noise_mask
+
+
+def _concat_chunks_along_t(chunks_4d, channels: int) -> torch.Tensor:
+    """Concatenate a list of SeedVR2-style collapsed 4D tensors
+    ``(B, channels*T_i, H, W)`` along the latent T axis. Each chunk is
+    un-collapsed to 5D, concatenated on ``dim=2``, then re-collapsed to 4D.
+    """
+    if len(chunks_4d) == 0:
+        raise ValueError("_concat_chunks_along_t: empty chunk list.")
+    fives = []
+    for ch in chunks_4d:
+        B, CT, H, W = ch.shape
+        if CT % channels != 0:
+            raise ValueError(
+                f"_concat_chunks_along_t: chunk shape {tuple(ch.shape)} "
+                f"channel dim {CT} not divisible by channels={channels}."
+            )
+        T = CT // channels
+        fives.append(ch.reshape(B, channels, T, H, W))
+    cat = torch.cat(fives, dim=2).contiguous()
+    B, C, T_total, H, W = cat.shape
+    return cat.reshape(B, C * T_total, H, W)
+
+
+def _hann_blend_weights_1d(overlap: int, device, dtype) -> torch.Tensor:
+    """Build a 1D crossfade weight tensor of length ``overlap`` for the
+    *previous* chunk's contribution; the current chunk's weight is
+    ``1 - w_prev``.
+
+    Mirrors the numz ``blend_overlapping_frames`` shape
+    (AInVFX/numz fork ``src/core/generation_utils.py``,
+    ``blend_overlapping_frames``): a Hann window with a ``[1/3, 2/3]``
+    dead-band when ``overlap >= 3``, and a plain linear ramp when
+    ``overlap < 3`` (the dead-band would collapse the transition for
+    very small overlap counts). The numz reference operates on
+    pixel-space tensors ``[overlap, H, W, C]``; this 1D form is
+    reshaped by the caller to broadcast across the latent's
+    ``(B, C, T_overlap, H, W)`` axes.
+    """
+    if overlap < 1:
+        raise ValueError(
+            f"_hann_blend_weights_1d: overlap must be >= 1; got {overlap}."
+        )
+    if overlap >= 3:
+        t = torch.linspace(0.0, 1.0, steps=overlap, device=device, dtype=dtype)
+        blend_start = 1.0 / 3.0
+        blend_end = 2.0 / 3.0
+        u = ((t - blend_start) / (blend_end - blend_start)).clamp(0.0, 1.0)
+        return 0.5 + 0.5 * torch.cos(torch.pi * u)
+    return torch.linspace(1.0, 0.0, steps=overlap, device=device, dtype=dtype)
+
+
+def _blend_overlap_region(prev_tail_5d: torch.Tensor,
+                          cur_head_5d: torch.Tensor) -> torch.Tensor:
+    """Blend two 5D ``(B, C, T_overlap, H, W)`` tensors of equal shape
+    using a 1D Hann/linear ramp along the T axis. ``prev_tail_5d``
+    receives the descending weight; ``cur_head_5d`` receives
+    ``1 - w_prev``.
+
+    The caller is responsible for ensuring both inputs have identical
+    shape and dtype/device.
+    """
+    if prev_tail_5d.shape != cur_head_5d.shape:
+        raise ValueError(
+            f"_blend_overlap_region: shape mismatch "
+            f"prev {tuple(prev_tail_5d.shape)} vs "
+            f"cur {tuple(cur_head_5d.shape)}."
+        )
+    overlap = int(prev_tail_5d.shape[2])
+    w_prev_1d = _hann_blend_weights_1d(
+        overlap, prev_tail_5d.device, prev_tail_5d.dtype,
+    )
+    # Reshape to (1, 1, overlap, 1, 1) for broadcast across B, C, H, W.
+    w_prev = w_prev_1d.view(1, 1, overlap, 1, 1)
+    w_cur = 1.0 - w_prev
+    return prev_tail_5d * w_prev + cur_head_5d * w_cur
+
+
+def _concat_chunks_with_overlap_blend(chunk_specs, channels: int,
+                                      overlap_latent: int) -> torch.Tensor:
+    """Concatenate temporally-overlapping chunks back into a single
+    collapsed 4D tensor, blending overlap regions with a Hann/linear
+    crossfade.
+
+    ``chunk_specs`` is a list of ``(t_start, t_end, chunk_4d)`` tuples
+    in source-latent T coordinates. ``overlap_latent == 0`` is a fast
+    path that delegates to plain concatenation (and produces output
+    bit-identical to ``_concat_chunks_along_t`` of the same chunks).
+
+    The blend at each pair of adjacent chunks acts on the actual
+    overlap region width ``min(prev_end - cur_start, current chunk
+    length)``, which may be smaller than ``overlap_latent`` when the
+    final chunk is a runt shorter than the configured overlap.
+    """
+    if len(chunk_specs) == 0:
+        raise ValueError("_concat_chunks_with_overlap_blend: empty chunk list.")
+    if overlap_latent < 0:
+        raise ValueError(
+            f"_concat_chunks_with_overlap_blend: overlap_latent must be "
+            f">= 0; got {overlap_latent}."
+        )
+
+    # Validate channel divisibility once and capture per-chunk T.
+    chunk_5d = []
+    for t_start, t_end, ch in chunk_specs:
+        B, CT, H, W = ch.shape
+        if CT % channels != 0:
+            raise ValueError(
+                f"_concat_chunks_with_overlap_blend: chunk shape "
+                f"{tuple(ch.shape)} channel dim {CT} not divisible "
+                f"by channels={channels}."
+            )
+        T = CT // channels
+        if t_end - t_start != T:
+            raise ValueError(
+                f"_concat_chunks_with_overlap_blend: chunk T={T} mismatches "
+                f"declared range [{t_start}:{t_end}]."
+            )
+        chunk_5d.append((t_start, t_end, ch.reshape(B, channels, T, H, W)))
+
+    if overlap_latent == 0:
+        # Fast path: pure concat in the caller-provided chunk order.
+        return _concat_chunks_along_t(
+            [c.reshape(c.shape[0], channels * c.shape[2], c.shape[3], c.shape[4])
+             for _, _, c in chunk_5d],
+            channels,
+        )
+
+    T_total = max(t_end for _, t_end, _ in chunk_5d)
+    first_5d = chunk_5d[0][2]
+    B = first_5d.shape[0]
+    H = first_5d.shape[3]
+    W = first_5d.shape[4]
+    result = torch.empty(
+        (B, channels, T_total, H, W),
+        device=first_5d.device, dtype=first_5d.dtype,
+    )
+    filled_until = 0
+    for i, (cs, ce, ct_5d) in enumerate(chunk_5d):
+        chunk_T = int(ct_5d.shape[2])
+        if i == 0:
+            result[:, :, cs:ce, :, :] = ct_5d
+            filled_until = ce
+            continue
+        # Overlap region width is bounded by both the previous fill
+        # frontier and the current chunk's actual length (for runt
+        # final chunks shorter than the configured overlap).
+        overlap_len = min(filled_until - cs, chunk_T)
+        if overlap_len > 0:
+            prev_tail = result[:, :, cs:cs + overlap_len, :, :].contiguous()
+            cur_head = ct_5d[:, :, :overlap_len, :, :].contiguous()
+            blended = _blend_overlap_region(prev_tail, cur_head)
+            result[:, :, cs:cs + overlap_len, :, :] = blended
+            tail_start = cs + overlap_len
+            tail_end = ce
+            if tail_end > tail_start:
+                result[:, :, tail_start:tail_end, :, :] = (
+                    ct_5d[:, :, overlap_len:, :, :]
+                )
+        else:
+            # Disjoint chunks (overlap_latent set but this pair did not
+            # actually overlap, e.g. step_latent equal to chunk_latent
+            # in a degenerate config). Treat as concat.
+            result[:, :, cs:ce, :, :] = ct_5d
+        filled_until = ce
+
+    return result.contiguous().reshape(B, channels * T_total, H, W)
+
+
+def _run_standard_sample(model, seed: int, steps: int, cfg: float,
+                         sampler_name: str, scheduler: str,
+                         positive, negative, latent_image: dict,
+                         denoise: float) -> dict:
+    """Single-shot delegation that mirrors the standard ``common_ksampler``
+    flow (``nodes.py:common_ksampler``): generate noise from seed, run
+    ``comfy.sample.sample``, return a latent dict. Used by the
+    ProgressiveSampler short-circuit when the full sequence fits in one
+    chunk so chunking introduces no overhead for small videos.
+    """
+    samples_in = latent_image["samples"]
+    samples_in = comfy.sample.fix_empty_latent_channels(
+        model, samples_in, latent_image.get("downscale_ratio_spacial", None),
+    )
+    batch_inds = latent_image.get("batch_index", None)
+    noise = comfy.sample.prepare_noise(samples_in, seed, batch_inds)
+    noise_mask = latent_image.get("noise_mask", None)
+    samples = comfy.sample.sample(
+        model, noise, steps, cfg, sampler_name, scheduler,
+        positive, negative, samples_in,
+        denoise=denoise, noise_mask=noise_mask, seed=seed,
+    )
+    out = latent_image.copy()
+    out.pop("downscale_ratio_spacial", None)
+    out["samples"] = samples
+    return out
+
+
+class SeedVR2ProgressiveSampler(io.ComfyNode):
+    """Sequential temporal chunking sampler for SeedVR2 native.
+
+    Drop-in replacement for ``KSampler`` in SeedVR2 native workflows that
+    OOM on long sequences. The latent enters the sampler in SeedVR2's
+    collapsed form ``(B, 16*T, H, W)`` (collapsed by ``SeedVR2Conditioning``
+    at ``rearrange(b c t h w -> b (c t) h w)``); this node slices that
+    tensor along the temporal axis, runs the configured inner sampler
+    sequentially per chunk against the standard ``comfy.sample.sample``
+    entry point, and concatenates per-chunk outputs back into a single
+    ``(B, 16*T_total, H, W)`` latent.
+
+    ``frames_per_chunk`` is expressed in pixel-frame units to match the
+    SeedVR2 4n+1 constraint enforced upstream by ``cut_videos`` and the
+    VAE's ``temporal_downsample_factor=4``. A pixel chunk size ``F``
+    maps to ``(F - 1) // 4 + 1`` latent-frame chunks.
+
+    Determinism contract: a single noise tensor is generated once from
+    the user seed and sliced per chunk (rather than re-seeding each
+    chunk), so a workflow that fits in a single chunk produces output
+    identical to a workflow that fits in N chunks at the same seed,
+    modulo the inherent T-axis chunk-boundary independence of the model.
+    """
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2ProgressiveSampler",
+            category="sampling",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("seed", default=0, min=0,
+                             max=0xffffffffffffffff,
+                             control_after_generate=True),
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("cfg", default=1.0, min=0.0, max=100.0,
+                               step=0.1, round=0.01),
+                io.Combo.Input("sampler_name",
+                               options=comfy.samplers.SAMPLER_NAMES),
+                io.Combo.Input("scheduler",
+                               options=comfy.samplers.SCHEDULER_NAMES),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Latent.Input("latent_image"),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0,
+                               step=0.01),
+                io.Int.Input("frames_per_chunk", default=21, min=1,
+                             max=16384, step=4),
+                io.Int.Input("temporal_overlap", default=0, min=0,
+                             max=16384,
+                             tooltip="Latent-frame overlap between "
+                                     "adjacent chunks; blended with a "
+                                     "Hann window (linear for overlap "
+                                     "< 3). 0 = no blend, pure concat. "
+                                     "Values >= the chunk's latent-frame "
+                                     "length use the maximum valid "
+                                     "overlap; 1 latent frame corresponds "
+                                     "to ~4 pixel frames."),
+                io.Combo.Input("chunking_mode",
+                               options=["manual", "auto"],
+                               default="manual",
+                               tooltip="manual = use frames_per_chunk "
+                                       "exactly; auto = retry only real OOM "
+                                       "failures with progressively smaller "
+                                       "temporal chunks."),
+            ],
+            outputs=[io.Latent.Output()],
+        )
+
+    @classmethod
+    def execute(cls, model, seed, steps, cfg, sampler_name, scheduler,
+                positive, negative, latent_image, denoise,
+                frames_per_chunk, temporal_overlap,
+                chunking_mode="manual") -> io.NodeOutput:
+        # 4n+1 validation in pixel-frame domain. The SeedVR2 native pipeline
+        # requires pixel-frame counts of the form 4n+1 (1, 5, 9, 13, ...),
+        # imposed at ``cut_videos`` upstream and propagated through the VAE's
+        # temporal_downsample_factor=4. Reject violations explicitly before
+        # any model invocation; a silent rounding would mis-align chunk
+        # boundaries with the 4n+1 lattice.
+        if frames_per_chunk < 1 or (frames_per_chunk - 1) % 4 != 0:
+            raise ValueError(
+                f"SeedVR2ProgressiveSampler: frames_per_chunk must be a "
+                f"4n+1 pixel-frame count (1, 5, 9, 13, 17, 21, ...); "
+                f"got {frames_per_chunk}."
+            )
+
+        samples_4d = latent_image["samples"]
+        samples_4d = comfy.sample.fix_empty_latent_channels(
+            model, samples_4d,
+            latent_image.get("downscale_ratio_spacial", None),
+        )
+        if samples_4d.ndim != 4:
+            raise ValueError(
+                f"SeedVR2ProgressiveSampler: expected 4D collapsed latent "
+                f"(B, 16*T, H, W); got shape {tuple(samples_4d.shape)}."
+            )
+        B, CT, H, W = samples_4d.shape
+        if CT % _SEEDVR2_LATENT_CHANNELS != 0:
+            raise ValueError(
+                f"SeedVR2ProgressiveSampler: collapsed channel dim {CT} is "
+                f"not divisible by SeedVR2 latent channels "
+                f"{_SEEDVR2_LATENT_CHANNELS}; latent does not appear to be "
+                f"SeedVR2-shaped."
+            )
+        T_latent = CT // _SEEDVR2_LATENT_CHANNELS
+        T_pixel = 4 * (T_latent - 1) + 1
+
+        if chunking_mode not in ("manual", "auto"):
+            raise ValueError(
+                f"SeedVR2ProgressiveSampler: chunking_mode must be "
+                f"'manual' or 'auto'; got {chunking_mode!r}."
+            )
+
+        if chunking_mode == "auto":
+            attempts = _seedvr2_auto_chunk_attempts(
+                T_latent, T_pixel, frames_per_chunk,
+            )
+            for i, attempt_frames_per_chunk in enumerate(attempts):
+                retry = False
+                try:
+                    return cls.execute(
+                        model=model, seed=seed, steps=steps, cfg=cfg,
+                        sampler_name=sampler_name, scheduler=scheduler,
+                        positive=positive, negative=negative,
+                        latent_image=latent_image, denoise=denoise,
+                        frames_per_chunk=attempt_frames_per_chunk,
+                        temporal_overlap=temporal_overlap,
+                        chunking_mode="manual",
+                    )
+                except Exception as e:
+                    comfy.model_management.raise_non_oom(e)
+                    if i == len(attempts) - 1:
+                        raise RuntimeError(
+                            "SeedVR2ProgressiveSampler: exhausted auto "
+                            "chunking attempts after OOM. Tried "
+                            f"frames_per_chunk values {attempts}."
+                        ) from e
+                    retry = True
+
+                if retry:
+                    logging.warning(
+                        "SeedVR2ProgressiveSampler auto chunking OOM at "
+                        "frames_per_chunk=%s; retrying with "
+                        "frames_per_chunk=%s.",
+                        attempt_frames_per_chunk, attempts[i + 1],
+                    )
+                    comfy.model_management.soft_empty_cache()
+
+        # Short-circuit: total fits in one chunk -> standard path with no
+        # chunking overhead. Output of this branch is byte-identical to the
+        # built-in KSampler given the same (model, seed, steps, cfg,
+        # sampler_name, scheduler, positive, negative, latent_image,
+        # denoise) tuple.
+        if T_pixel <= frames_per_chunk:
+            return io.NodeOutput(_run_standard_sample(
+                model, seed, steps, cfg, sampler_name, scheduler,
+                positive, negative, latent_image, denoise,
+            ))
+
+        # Map pixel chunk -> latent chunk. Each chunk's latent length is
+        # at most ``chunk_latent``; the final chunk may be a runt that
+        # is automatically 4n+1-aligned in the pixel domain by the
+        # T_pixel = 4*(T_latent-1) + 1 mapping (every positive integer
+        # T_latent corresponds to a valid 4n+1 pixel count).
+        chunk_latent = (frames_per_chunk - 1) // 4 + 1
+
+        # ``temporal_overlap`` is exposed in latent-frame units, but users
+        # do not know the derived latent chunk length. Treat oversized
+        # values as "maximum valid overlap" while preserving a strictly
+        # positive chunk-loop stride.
+        if temporal_overlap < 0:
+            raise ValueError(
+                f"SeedVR2ProgressiveSampler: temporal_overlap must be >= 0; "
+                f"got {temporal_overlap}."
+            )
+        temporal_overlap = min(temporal_overlap, chunk_latent - 1)
+        step_latent = chunk_latent - temporal_overlap
+
+        # Generate full noise once from the user seed, then slice along T
+        # per chunk. Using one global noise tensor (rather than re-seeding
+        # per chunk) preserves seed-determinism across chunk-count
+        # variations: the same (seed, total T_latent) always produces the
+        # same noise samples regardless of how the work is partitioned.
+        batch_inds = latent_image.get("batch_index", None)
+        noise_full = comfy.sample.prepare_noise(samples_4d, seed, batch_inds)
+
+        noise_mask = latent_image.get("noise_mask", None)
+
+        # Build the flat list of chunk ranges first so the chunking
+        # geometry is fully known before any sample call.
+        chunk_ranges = []
+        for chunk_start in range(0, T_latent, step_latent):
+            chunk_end = min(chunk_start + chunk_latent, T_latent)
+            if chunk_start >= chunk_end:
+                # The final iteration of a stride that lands exactly on
+                # T_latent produces a zero-length chunk; skip it.
+                break
+            chunk_ranges.append((chunk_start, chunk_end))
+            if chunk_end >= T_latent:
+                break
+
+        def _sample_one_chunk(chunk_start, chunk_end):
+            samples_chunk = _slice_collapsed_4d_along_t(
+                samples_4d, chunk_start, chunk_end,
+                _SEEDVR2_LATENT_CHANNELS,
+            )
+            noise_chunk = _slice_collapsed_4d_along_t(
+                noise_full, chunk_start, chunk_end,
+                _SEEDVR2_LATENT_CHANNELS,
+            )
+            positive_chunk = _slice_seedvr2_cond_along_t(
+                positive, chunk_start, chunk_end,
+            )
+            negative_chunk = _slice_seedvr2_cond_along_t(
+                negative, chunk_start, chunk_end,
+            )
+
+            # Per-chunk noise_mask handling: standard masks are passed
+            # through for KSampler expansion; pre-expanded collapsed
+            # masks are sliced.
+            chunk_noise_mask = None
+            if noise_mask is not None:
+                chunk_noise_mask = _slice_seedvr2_noise_mask_along_t(
+                    noise_mask, samples_4d, chunk_start, chunk_end,
+                )
+
+            return comfy.sample.sample(
+                model, noise_chunk, steps, cfg, sampler_name, scheduler,
+                positive_chunk, negative_chunk, samples_chunk,
+                denoise=denoise, noise_mask=chunk_noise_mask, seed=seed,
+            )
+
+        chunk_specs = []
+        for chunk_start, chunk_end in chunk_ranges:
+            chunk_samples = _sample_one_chunk(chunk_start, chunk_end)
+            chunk_specs.append((chunk_start, chunk_end, chunk_samples))
+
+        final = _concat_chunks_with_overlap_blend(
+            chunk_specs, _SEEDVR2_LATENT_CHANNELS, temporal_overlap,
+        )
+
+        out = latent_image.copy()
+        out.pop("downscale_ratio_spacial", None)
+        out["samples"] = final
+        return io.NodeOutput(out)
+
+
+class SeedVRExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            SeedVR2Conditioning,
+            SeedVR2Resize,
+            SeedVR2ResizeAdvanced,
+            SeedVR2PostProcessing,
+            SeedVR2ProgressiveSampler,
+        ]
+
+async def comfy_entrypoint() -> SeedVRExtension:
+    return SeedVRExtension()
diff --git a/nodes.py b/nodes.py
index 669a7057b..a3d5af27f 100644
--- a/nodes.py
+++ b/nodes.py
@@ -47,14 +47,18 @@ import node_helpers
 if args.enable_manager:
     import comfyui_manager
 
+
 def before_node_execution():
     comfy.model_management.throw_exception_if_processing_interrupted()
 
+
 def interrupt_processing(value=True):
     comfy.model_management.interrupt_current_processing(value)
 
+
 MAX_RESOLUTION=16384
 
+
 class CLIPTextEncode(ComfyNodeABC):
     @classmethod
     def INPUT_TYPES(s) -> InputTypeDict:
@@ -323,8 +327,8 @@ class VAEDecodeTiled:
         return {"required": {"samples": ("LATENT", ), "vae": ("VAE", ),
                              "tile_size": ("INT", {"default": 512, "min": 64, "max": 4096, "step": 32, "advanced": True}),
                              "overlap": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 32, "advanced": True}),
-                             "temporal_size": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to decode at a time.", "advanced": True}),
-                             "temporal_overlap": ("INT", {"default": 8, "min": 4, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
+                             "temporal_size": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to decode at a time. SeedVR2 allows 0 to disable temporal slicing.", "advanced": True}),
+                             "temporal_overlap": ("INT", {"default": 8, "min": 0, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
                             }}
     RETURN_TYPES = ("IMAGE",)
     FUNCTION = "decode"
@@ -334,18 +338,32 @@ class VAEDecodeTiled:
     def decode(self, vae, samples, tile_size, overlap=64, temporal_size=64, temporal_overlap=8):
         if tile_size < overlap * 4:
             overlap = tile_size // 4
-        if temporal_size < temporal_overlap * 2:
-            temporal_overlap = temporal_overlap // 2
         temporal_compression = vae.temporal_compression_decode()
         if temporal_compression is not None:
-            temporal_size = max(2, temporal_size // temporal_compression)
-            temporal_overlap = max(1, min(temporal_size // 2, temporal_overlap // temporal_compression))
+            if temporal_size <= 0:
+                temporal_size = 0
+                temporal_overlap = 0
+            else:
+                requested_temporal_overlap = temporal_overlap
+                if temporal_size < temporal_overlap * 2:
+                    temporal_overlap = temporal_overlap // 2
+                temporal_size = max(2, temporal_size // temporal_compression)
+                temporal_overlap = min(temporal_size // 2, temporal_overlap // temporal_compression)
+                if requested_temporal_overlap > 0:
+                    temporal_overlap = max(1, temporal_overlap)
         else:
             temporal_size = None
             temporal_overlap = None
 
         compression = vae.spacial_compression_decode()
-        images = vae.decode_tiled(samples["samples"], tile_x=tile_size // compression, tile_y=tile_size // compression, overlap=overlap // compression, tile_t=temporal_size, overlap_t=temporal_overlap)
+        images = vae.decode_tiled(
+            samples["samples"],
+            tile_x=tile_size // compression,
+            tile_y=tile_size // compression,
+            overlap=overlap // compression,
+            tile_t=temporal_size,
+            overlap_t=temporal_overlap,
+        )
         if len(images.shape) == 5: #Combine batches
             images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1])
         return (images, )
@@ -362,7 +380,7 @@ class VAEEncode:
 
     def encode(self, vae, pixels):
         t = vae.encode(pixels)
-        return ({"samples":t}, )
+        return ({"samples": t}, )
 
 class VAEEncodeTiled:
     @classmethod
@@ -370,8 +388,8 @@ class VAEEncodeTiled:
         return {"required": {"pixels": ("IMAGE", ), "vae": ("VAE", ),
                              "tile_size": ("INT", {"default": 512, "min": 64, "max": 4096, "step": 64, "advanced": True}),
                              "overlap": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 32, "advanced": True}),
-                             "temporal_size": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to encode at a time.", "advanced": True}),
-                             "temporal_overlap": ("INT", {"default": 8, "min": 4, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
+                             "temporal_size": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to encode at a time. SeedVR2 allows 0 to disable temporal slicing.", "advanced": True}),
+                             "temporal_overlap": ("INT", {"default": 8, "min": 0, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
                             }}
     RETURN_TYPES = ("LATENT",)
     FUNCTION = "encode"
@@ -379,6 +397,9 @@ class VAEEncodeTiled:
     CATEGORY = "experimental"
 
     def encode(self, vae, pixels, tile_size, overlap, temporal_size=64, temporal_overlap=8):
+        if temporal_size <= 0:
+            temporal_size = 0
+            temporal_overlap = 0
         t = vae.encode_tiled(pixels, tile_x=tile_size, tile_y=tile_size, overlap=overlap, tile_t=temporal_size, overlap_t=temporal_overlap)
         return ({"samples": t}, )
 
@@ -2417,6 +2438,7 @@ async def init_builtin_extra_nodes():
         "nodes_camera_trajectory.py",
         "nodes_edit_model.py",
         "nodes_tcfg.py",
+        "nodes_seedvr.py",
         "nodes_context_windows.py",
         "nodes_qwen.py",
         "nodes_chroma_radiance.py",