Merge 1226e301ad into 3e3ed8cc2a

Add script in AMD portable to launch with dynamic vram. (#13667 )
List all the portable downloads in the README section. (#13666 )
2026-05-23 23:47:25 +08:00 · 2026-05-02 09:08:15 -07:00 · 2026-05-01 20:19:46 -04:00 · 2026-05-01 20:19:32 -04:00 · 2026-05-02 06:37:18 +08:00 · 2026-05-01 14:17:25 -07:00
21 changed files with 586 additions and 141 deletions
--- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
@ -1,2 +1,2 @@
-.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-dynamic-vram
 pause
--- a/2
+++ b/2
@ -1,2 +1,2 @@
 # Admins
-* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128
+* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128 @kijai
--- a/README.md
+++ b/README.md
@ -193,13 +193,15 @@ If you have trouble extracting it, right click the file -> properties -> unblock

 The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start.

-#### Alternative Downloads:
+#### All Official Portable Downloads:

 [Portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)

-[Experimental portable for Intel GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_intel.7z)
+[Portable for Intel GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_intel.7z)

-[Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).
+[Portable for Nvidia GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia.7z) (supports 20 series and above).
+
+[Portable for Nvidia GPUs with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).

 #### How do I share models between another UI and ComfyUI?

--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -90,7 +90,6 @@ parser.add_argument("--force-channels-last", action="store_true", help="Force ch
 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")

 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
-parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
 parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")

 class LatentPreviewMethod(enum.Enum):
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@ -8,6 +8,8 @@ from abc import ABC, abstractmethod
 import logging
 import comfy.model_management
 import comfy.patcher_extension
+import comfy.utils
+import comfy.conds
 if TYPE_CHECKING:
    from comfy.model_base import BaseModel
    from comfy.model_patcher import ModelPatcher
@ -51,12 +53,18 @@ class ContextHandlerABC(ABC):


 class IndexListContextWindow(ContextWindowABC):
-    def __init__(self, index_list: list[int], dim: int=0, total_frames: int=0):
+    def __init__(self, index_list: list[int], dim: int=0, total_frames: int=0, modality_windows: dict=None, context_overlap: int=0):
        self.index_list = index_list
        self.context_length = len(index_list)
+        self.context_overlap = context_overlap
        self.dim = dim
        self.total_frames = total_frames
        self.center_ratio = (min(index_list) + max(index_list)) / (2 * total_frames)
+        self.modality_windows = modality_windows  # dict of {mod_idx: IndexListContextWindow}
+        self.guide_frames_indices: list[int] = []
+        self.guide_overlap_info: list[tuple[int, int]] = []
+        self.guide_kf_local_positions: list[int] = []
+        self.guide_downscale_factors: list[int] = []

    def get_tensor(self, full: torch.Tensor, device=None, dim=None, retain_index_list=[]) -> torch.Tensor:
        if dim is None:
@ -81,6 +89,11 @@ class IndexListContextWindow(ContextWindowABC):
        region_idx = int(self.center_ratio * num_regions)
        return min(max(region_idx, 0), num_regions - 1)

+    def get_window_for_modality(self, modality_idx: int) -> 'IndexListContextWindow':
+        if modality_idx == 0:
+            return self
+        return self.modality_windows[modality_idx]
+

 class IndexListCallbacks:
    EVALUATE_CONTEXT_WINDOWS = "evaluate_context_windows"
@ -137,6 +150,157 @@ def slice_cond(cond_value, window: IndexListContextWindow, x_in: torch.Tensor, d
    return cond_value._copy_with(sliced)


+def compute_guide_overlap(guide_entries: list[dict], window_index_list: list[int]):
+    """Compute which concatenated guide frames overlap with a context window.
+
+    Args:
+        guide_entries: list of guide_attention_entry dicts
+        window_index_list: the window's frame indices into the video portion
+
+    Returns:
+        suffix_indices: indices into the guide_frames tensor for frame selection
+        overlap_info: list of (entry_idx, overlap_count) for guide_attention_entries adjustment
+        kf_local_positions: window-local frame positions for keyframe_idxs regeneration
+        total_overlap: total number of overlapping guide frames
+    """
+    window_set = set(window_index_list)
+    window_list = list(window_index_list)
+    suffix_indices = []
+    overlap_info = []
+    kf_local_positions = []
+    suffix_base = 0
+
+    for entry_idx, entry in enumerate(guide_entries):
+        latent_start = entry.get("latent_start", None)
+        if latent_start is None:
+            raise ValueError("guide_attention_entry missing required 'latent_start'.")
+        guide_len = entry["latent_shape"][0]
+        entry_overlap = 0
+
+        for local_offset in range(guide_len):
+            video_pos = latent_start + local_offset
+            if video_pos in window_set:
+                suffix_indices.append(suffix_base + local_offset)
+                kf_local_positions.append(window_list.index(video_pos))
+                entry_overlap += 1
+
+        if entry_overlap > 0:
+            overlap_info.append((entry_idx, entry_overlap))
+        suffix_base += guide_len
+
+    return suffix_indices, overlap_info, kf_local_positions, len(suffix_indices)
+
+
+@dataclass
+class WindowingState:
+    """Per-modality context windowing state for each step,
+    built using IndexListContextHandler._build_window_state().
+    For non-multimodal models the lists are length 1
+    """
+    latents: list[torch.Tensor]                  # per-modality working latents (guide frames stripped)
+    guide_latents: list[torch.Tensor | None]     # per-modality guide frames stripped from latents
+    guide_entries: list[list[dict] | None]       # per-modality guide_attention_entry metadata
+    latent_shapes: list | None                   # original packed shapes for unpack/pack (None if not multimodal)
+    dim: int = 0                                 # primary modality temporal dim for context windowing
+    is_multimodal: bool = False
+
+    def prepare_window(self, window: IndexListContextWindow, model) -> IndexListContextWindow:
+        """Reformat window for multimodal contexts by deriving per-modality index lists.
+        Non-multimodal contexts return the input window unchanged.
+        """
+        if not self.is_multimodal:
+            return window
+
+        x = self.latents[0]
+        primary_total = self.latent_shapes[0][self.dim]
+        primary_overlap = window.context_overlap
+        map_shapes = self.latent_shapes
+        if x.size(self.dim) != primary_total:
+            map_shapes = list(self.latent_shapes)
+            video_shape = list(self.latent_shapes[0])
+            video_shape[self.dim] = x.size(self.dim)
+            map_shapes[0] = torch.Size(video_shape)
+        try:
+            per_modality_indices = model.map_context_window_to_modalities(
+                window.index_list, map_shapes, self.dim)
+        except AttributeError:
+            raise NotImplementedError(
+                f"{type(model).__name__} must implement map_context_window_to_modalities for multimodal context windows.")
+        modality_windows = {}
+        for mod_idx in range(1, len(self.latents)):
+            modality_total_frames = self.latents[mod_idx].shape[self.dim]
+            ratio = modality_total_frames / primary_total if primary_total > 0 else 1
+            modality_overlap = max(round(primary_overlap * ratio), 0)
+            modality_windows[mod_idx] = IndexListContextWindow(
+                per_modality_indices[mod_idx], dim=self.dim,
+                total_frames=modality_total_frames,
+                context_overlap=modality_overlap)
+        return IndexListContextWindow(
+            window.index_list, dim=self.dim, total_frames=x.shape[self.dim],
+            modality_windows=modality_windows, context_overlap=primary_overlap)
+
+    def slice_for_window(self, window: IndexListContextWindow, retain_index_list: list[int], device=None) -> tuple[list[torch.Tensor], list[int]]:
+        """Slice latents for a context window, injecting guide frames where applicable.
+        For multimodal contexts, uses the modality-specific windows derived in prepare_window().
+        """
+        sliced = []
+        guide_frame_counts = []
+        for idx in range(len(self.latents)):
+            modality_window = window.get_window_for_modality(idx)
+            retain = retain_index_list if idx == 0 else []
+            s = modality_window.get_tensor(self.latents[idx], device, retain_index_list=retain)
+            if self.guide_entries[idx] is not None:
+                s, ng = self._inject_guide_frames(s, modality_window, modality_idx=idx)
+            else:
+                ng = 0
+            sliced.append(s)
+            guide_frame_counts.append(ng)
+        return sliced, guide_frame_counts
+
+    def strip_guide_frames(self, out_per_modality: list[list[torch.Tensor]], guide_frame_counts: list[int], window: IndexListContextWindow):
+        """Strip injected guide frames from per-cond, per-modality outputs in place."""
+        for idx in range(len(self.latents)):
+            if guide_frame_counts[idx] > 0:
+                window_len = len(window.get_window_for_modality(idx).index_list)
+                for ci in range(len(out_per_modality)):
+                    out_per_modality[ci][idx] = out_per_modality[ci][idx].narrow(self.dim, 0, window_len)
+
+    def _inject_guide_frames(self, latent_slice: torch.Tensor, window: IndexListContextWindow, modality_idx: int = 0) -> tuple[torch.Tensor, int]:
+        guide_entries = self.guide_entries[modality_idx]
+        guide_frames = self.guide_latents[modality_idx]
+        suffix_idx, overlap_info, kf_local_pos, guide_frame_count = compute_guide_overlap(guide_entries, window.index_list)
+        window.guide_frames_indices = suffix_idx
+        window.guide_overlap_info = overlap_info
+        window.guide_kf_local_positions = kf_local_pos
+
+        # Derive per-overlap-entry latent_downscale_factor from guide entry latent_shape vs guide frame spatial dims.
+        # guide_frames has full (post-dilation) spatial dims; entry["latent_shape"] has pre-dilation dims.
+        guide_downscale_factors = []
+        if guide_frame_count > 0:
+            full_H = guide_frames.shape[3]
+            for entry_idx, _ in overlap_info:
+                entry_H = guide_entries[entry_idx]["latent_shape"][1]
+                guide_downscale_factors.append(full_H // entry_H)
+        window.guide_downscale_factors = guide_downscale_factors
+
+        if guide_frame_count > 0:
+            idx = tuple([slice(None)] * self.dim + [suffix_idx])
+            return torch.cat([latent_slice, guide_frames[idx]], dim=self.dim), guide_frame_count
+        return latent_slice, 0
+
+    def patch_latent_shapes(self, sub_conds, new_shapes):
+        if not self.is_multimodal:
+            return
+
+        for cond_list in sub_conds:
+            if cond_list is None:
+                continue
+            for cond_dict in cond_list:
+                model_conds = cond_dict.get('model_conds', {})
+                if 'latent_shapes' in model_conds:
+                    model_conds['latent_shapes'] = comfy.conds.CONDConstant(new_shapes)
+
+
@dataclass
 class ContextSchedule:
    name: str
@ -150,7 +314,8 @@ class ContextFuseMethod:
 ContextResults = collections.namedtuple("ContextResults", ['window_idx', 'sub_conds_out', 'sub_conds', 'window'])
 class IndexListContextHandler(ContextHandlerABC):
    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1,
-                 closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False):
+                 closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False,
+                 latent_retain_index_list: list[int]=[]):
        self.context_schedule = context_schedule
        self.fuse_method = fuse_method
        self.context_length = context_length
@ -162,16 +327,100 @@ class IndexListContextHandler(ContextHandlerABC):
        self.freenoise = freenoise
        self.cond_retain_index_list = [int(x.strip()) for x in cond_retain_index_list.split(",")] if cond_retain_index_list else []
        self.split_conds_to_windows = split_conds_to_windows
+        self.latent_retain_index_list = [int(x.strip()) for x in latent_retain_index_list.split(",")] if latent_retain_index_list else []

        self.callbacks = {}

+    @staticmethod
+    def _get_latent_shapes(conds):
+        for cond_list in conds:
+            if cond_list is None:
+                continue
+            for cond_dict in cond_list:
+                model_conds = cond_dict.get('model_conds', {})
+                if 'latent_shapes' in model_conds:
+                    return model_conds['latent_shapes'].cond
+        return None
+
+    @staticmethod
+    def _get_guide_entries(conds):
+        for cond_list in conds:
+            if cond_list is None:
+                continue
+            for cond_dict in cond_list:
+                model_conds = cond_dict.get('model_conds', {})
+                entries = model_conds.get('guide_attention_entries')
+                if entries is not None and hasattr(entries, 'cond') and entries.cond:
+                    return entries.cond
+        return None
+
+    def _apply_freenoise(self, noise: torch.Tensor, conds: list[list[dict]], seed: int) -> torch.Tensor:
+        """Apply FreeNoise shuffling, scaling context length/overlap per-modality by frame ratio.
+        If guide frames are present on the primary modality, only the video portion is shuffled.
+        """
+        guide_entries = self._get_guide_entries(conds)
+        guide_count = sum(e["latent_shape"][0] for e in guide_entries) if guide_entries else 0
+
+        latent_shapes = self._get_latent_shapes(conds)
+        if latent_shapes is not None and len(latent_shapes) > 1:
+            modalities = comfy.utils.unpack_latents(noise, latent_shapes)
+            primary_total = latent_shapes[0][self.dim]
+            primary_video_count = modalities[0].size(self.dim) - guide_count
+            apply_freenoise(modalities[0].narrow(self.dim, 0, primary_video_count), self.dim, self.context_length, self.context_overlap, seed)
+            for i in range(1, len(modalities)):
+                mod_total = latent_shapes[i][self.dim]
+                ratio = mod_total / primary_total if primary_total > 0 else 1
+                mod_ctx_len = max(round(self.context_length * ratio), 1)
+                mod_ctx_overlap = max(round(self.context_overlap * ratio), 0)
+                modalities[i] = apply_freenoise(modalities[i], self.dim, mod_ctx_len, mod_ctx_overlap, seed)
+            noise, _ = comfy.utils.pack_latents(modalities)
+            return noise
+        video_count = noise.size(self.dim) - guide_count
+        apply_freenoise(noise.narrow(self.dim, 0, video_count), self.dim, self.context_length, self.context_overlap, seed)
+        return noise
+
+    def _build_window_state(self, x_in: torch.Tensor, conds: list[list[dict]]) -> WindowingState:
+        """Build windowing state for the current step, including unpacking latents and extracting guide frame info from conds."""
+        latent_shapes = self._get_latent_shapes(conds)
+        is_multimodal = latent_shapes is not None and len(latent_shapes) > 1
+        unpacked_latents = comfy.utils.unpack_latents(x_in, latent_shapes) if is_multimodal else [x_in]
+
+        unpacked_latents_list = list(unpacked_latents)
+        guide_latents_list = [None] * len(unpacked_latents)
+        guide_entries_list = [None] * len(unpacked_latents)
+
+        extracted_guide_entries = self._get_guide_entries(conds)
+
+        # Strip guide frames (only from first modality for now)
+        if extracted_guide_entries is not None:
+            guide_count = sum(e["latent_shape"][0] for e in extracted_guide_entries)
+            if guide_count > 0:
+                x = unpacked_latents[0]
+                latent_count = x.size(self.dim) - guide_count
+                unpacked_latents_list[0] = x.narrow(self.dim, 0, latent_count)
+                guide_latents_list[0] = x.narrow(self.dim, latent_count, guide_count)
+                guide_entries_list[0] = extracted_guide_entries
+
+
+        return WindowingState(
+            latents=unpacked_latents_list,
+            guide_latents=guide_latents_list,
+            guide_entries=guide_entries_list,
+            latent_shapes=latent_shapes,
+            dim=self.dim,
+            is_multimodal=is_multimodal)
+
    def should_use_context(self, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]) -> bool:
-        # for now, assume first dim is batch - should have stored on BaseModel in actual implementation
-        if x_in.size(self.dim) > self.context_length:
-            logging.info(f"Using context windows {self.context_length} with overlap {self.context_overlap} for {x_in.size(self.dim)} frames.")
+        window_state = self._build_window_state(x_in, conds) # build window_state to check frame counts, will be built again in execute
+        total_frame_count = window_state.latents[0].size(self.dim)
+        if total_frame_count > self.context_length:
+            logging.info(f"\nUsing context windows: Context length {self.context_length} with overlap {self.context_overlap} for {total_frame_count} frames.")
            if self.cond_retain_index_list:
                logging.info(f"Retaining original cond for indexes: {self.cond_retain_index_list}")
+            if self.latent_retain_index_list:
+                logging.info(f"Retaining original latent for indexes: {self.latent_retain_index_list}")
            return True
+        logging.info(f"\nNot using context windows since context length ({self.context_length}) exceeds input frames ({total_frame_count}).")
        return False

    def prepare_control_objects(self, control: ControlBase, device=None) -> ControlBase:
@ -262,7 +511,9 @@ class IndexListContextHandler(ContextHandlerABC):
        return resized_cond

    def set_step(self, timestep: torch.Tensor, model_options: dict[str]):
-        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep[0], rtol=0.0001)
+        sample_sigmas = model_options["transformer_options"]["sample_sigmas"]
+        current_timestep = timestep[0].to(sample_sigmas.dtype)
+        mask = torch.isclose(sample_sigmas, current_timestep, rtol=0.0001)
        matches = torch.nonzero(mask)
        if torch.numel(matches) == 0:
            return  # substep from multi-step sampler: keep self._step from the last full step
@ -271,68 +522,128 @@ class IndexListContextHandler(ContextHandlerABC):
    def get_context_windows(self, model: BaseModel, x_in: torch.Tensor, model_options: dict[str]) -> list[IndexListContextWindow]:
        full_length = x_in.size(self.dim) # TODO: choose dim based on model
        context_windows = self.context_schedule.func(full_length, self, model_options)
-        context_windows = [IndexListContextWindow(window, dim=self.dim, total_frames=full_length) for window in context_windows]
+        context_windows = [IndexListContextWindow(window, dim=self.dim, total_frames=full_length, context_overlap=self.context_overlap) for window in context_windows]
        return context_windows

    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
        self._model = model
        self.set_step(timestep, model_options)
-        context_windows = self.get_context_windows(model, x_in, model_options)
-        enumerated_context_windows = list(enumerate(context_windows))

-        conds_final = [torch.zeros_like(x_in) for _ in conds]
+        window_state = self._build_window_state(x_in, conds)
+        num_modalities = len(window_state.latents)
+
+        context_windows = self.get_context_windows(model, window_state.latents[0], model_options)
+        enumerated_context_windows = list(enumerate(context_windows))
+        total_windows = len(enumerated_context_windows)
+
+        # Initialize per-modality accumulators (length 1 for single-modality)
+        accum = [[torch.zeros_like(m) for _ in conds] for m in window_state.latents]
        if self.fuse_method.name == ContextFuseMethods.RELATIVE:
-            counts_final = [torch.ones(get_shape_for_dim(x_in, self.dim), device=x_in.device) for _ in conds]
+            counts = [[torch.ones(get_shape_for_dim(m, self.dim), device=m.device) for _ in conds] for m in window_state.latents]
        else:
-            counts_final = [torch.zeros(get_shape_for_dim(x_in, self.dim), device=x_in.device) for _ in conds]
-        biases_final = [([0.0] * x_in.shape[self.dim]) for _ in conds]
+            counts = [[torch.zeros(get_shape_for_dim(m, self.dim), device=m.device) for _ in conds] for m in window_state.latents]
+        biases = [[([0.0] * m.shape[self.dim]) for _ in conds] for m in window_state.latents]

        for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EXECUTE_START, self.callbacks):
            callback(self, model, x_in, conds, timestep, model_options)

+        # accumulate results from each context window
        for enum_window in enumerated_context_windows:
-            results = self.evaluate_context_windows(calc_cond_batch, model, x_in, conds, timestep, [enum_window], model_options)
+            results = self.evaluate_context_windows(
+                calc_cond_batch, model, x_in, conds, timestep, [enum_window],
+                model_options, window_state=window_state, total_windows=total_windows)
            for result in results:
-                self.combine_context_window_results(x_in, result.sub_conds_out, result.sub_conds, result.window, result.window_idx, len(enumerated_context_windows), timestep,
-                                            conds_final, counts_final, biases_final)
+                # result.sub_conds_out is per-cond, per-modality: list[list[Tensor]]
+                for mod_idx in range(num_modalities):
+                    mod_out = [result.sub_conds_out[ci][mod_idx] for ci in range(len(conds))]
+                    modality_window = result.window.get_window_for_modality(mod_idx)
+                    self.combine_context_window_results(
+                        window_state.latents[mod_idx], mod_out, result.sub_conds, modality_window,
+                        result.window_idx, total_windows, timestep,
+                        accum[mod_idx], counts[mod_idx], biases[mod_idx])
+
+        # fuse accumulated results into final conds
        try:
-            # finalize conds
-            if self.fuse_method.name == ContextFuseMethods.RELATIVE:
-                # relative is already normalized, so return as is
-                del counts_final
-                return conds_final
-            else:
-                # normalize conds via division by context usage counts
-                for i in range(len(conds_final)):
-                    conds_final[i] /= counts_final[i]
-                del counts_final
-                return conds_final
+            result_out = []
+            for ci in range(len(conds)):
+                finalized = []
+                for mod_idx in range(num_modalities):
+                    if self.fuse_method.name != ContextFuseMethods.RELATIVE:
+                        accum[mod_idx][ci] /= counts[mod_idx][ci]
+                    f = accum[mod_idx][ci]
+
+                    # if guide frames were injected, append them to the end of the fused latents for the next step
+                    if window_state.guide_latents[mod_idx] is not None:
+                        f = torch.cat([f, window_state.guide_latents[mod_idx]], dim=self.dim)
+                    finalized.append(f)
+
+                # pack modalities together if needed
+                if window_state.is_multimodal and len(finalized) > 1:
+                    packed, _ = comfy.utils.pack_latents(finalized)
+                else:
+                    packed = finalized[0]
+
+                result_out.append(packed)
+            return result_out
        finally:
            for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EXECUTE_CLEANUP, self.callbacks):
                callback(self, model, x_in, conds, timestep, model_options)

-    def evaluate_context_windows(self, calc_cond_batch: Callable, model: BaseModel, x_in: torch.Tensor, conds, timestep: torch.Tensor, enumerated_context_windows: list[tuple[int, IndexListContextWindow]],
-                                model_options, device=None, first_device=None):
+    def evaluate_context_windows(self, calc_cond_batch: Callable, model: BaseModel, x_in: torch.Tensor, conds,
+                                timestep: torch.Tensor, enumerated_context_windows: list[tuple[int, IndexListContextWindow]],
+                                model_options, window_state: WindowingState, total_windows: int = None,
+                                device=None, first_device=None):
+        """Evaluate context windows and return per-cond, per-modality outputs in ContextResults.sub_conds_out
+
+        For each window:
+        1. Builds windows (for each modality if multimodal)
+        2. Slices window for each modality
+        3. Injects concatenated latent guide frames where present
+        4. Packs together if needed and calls model
+        5. Unpacks and strips any guides from outputs
+        """
+        x = window_state.latents[0]
+
        results: list[ContextResults] = []
        for window_idx, window in enumerated_context_windows:
            # allow processing to end between context window executions for faster Cancel
            comfy.model_management.throw_exception_if_processing_interrupted()

+            # prepare the window accounting for multimodal windows
+            window = window_state.prepare_window(window, model)
+
+            # slice the window for each modality, injecting guide frames where applicable
+            sliced, guide_frame_counts_per_modality = window_state.slice_for_window(window, self.latent_retain_index_list, device)
+
            for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EVALUATE_CONTEXT_WINDOWS, self.callbacks):
                callback(self, model, x_in, conds, timestep, model_options, window_idx, window, model_options, device, first_device)

-            # update exposed params
-            model_options["transformer_options"]["context_window"] = window
-            # get subsections of x, timestep, conds
-            sub_x = window.get_tensor(x_in, device)
-            sub_timestep = window.get_tensor(timestep, device, dim=0)
-            sub_conds = [self.get_resized_cond(cond, x_in, window, device) for cond in conds]
+            logging.info(f"Context window {window_idx + 1}/{total_windows or len(enumerated_context_windows)}: frames {window.index_list[0]}-{window.index_list[-1]} of {x.shape[self.dim]}"
+                         + (f" (+{guide_frame_counts_per_modality[0]} guide frames)" if guide_frame_counts_per_modality[0] > 0 else "")
+                         )

+            # if multimodal, pack modalities together
+            if window_state.is_multimodal and len(sliced) > 1:
+                sub_x, sub_shapes = comfy.utils.pack_latents(sliced)
+            else:
+                sub_x, sub_shapes = sliced[0], [sliced[0].shape]
+
+            # get resized conds for window
+            model_options["transformer_options"]["context_window"] = window
+            sub_timestep = window.get_tensor(timestep, dim=0)
+            sub_conds = [self.get_resized_cond(cond, x, window) for cond in conds]
+
+            # if multimodal, patch latent_shapes in conds for correct unpacking in model
+            window_state.patch_latent_shapes(sub_conds, sub_shapes)
+
+            # call model on window
            sub_conds_out = calc_cond_batch(model, sub_conds, sub_x, sub_timestep, model_options)
-            if device is not None:
-                for i in range(len(sub_conds_out)):
-                    sub_conds_out[i] = sub_conds_out[i].to(x_in.device)
-            results.append(ContextResults(window_idx, sub_conds_out, sub_conds, window))
+
+            # unpack outputs and strip guide frames
+            out_per_modality = [comfy.utils.unpack_latents(sub_conds_out[i], sub_shapes) for i in range(len(sub_conds_out))]
+            window_state.strip_guide_frames(out_per_modality, guide_frame_counts_per_modality, window)
+
+            results.append(ContextResults(window_idx, out_per_modality, sub_conds, window))
        return results


@ -356,7 +667,7 @@ class IndexListContextHandler(ContextHandlerABC):
                    biases_final[i][idx] = bias_total + bias
        else:
            # add conds and counts based on weights of fuse method
-            weights = get_context_weights(window.context_length, x_in.shape[self.dim], window.index_list, self, sigma=timestep)
+            weights = get_context_weights(window.context_length, x_in.shape[self.dim], window.index_list, self, sigma=timestep, context_overlap=window.context_overlap)
            weights_tensor = match_weights_to_dim(weights, x_in, self.dim, device=x_in.device)
            for i in range(len(sub_conds_out)):
                window.add_window(conds_final[i], sub_conds_out[i] * weights_tensor)
@ -366,16 +677,22 @@ class IndexListContextHandler(ContextHandlerABC):
            callback(self, x_in, sub_conds_out, sub_conds, window, window_idx, total_windows, timestep, conds_final, counts_final, biases_final)


-def _prepare_sampling_wrapper(executor, model, noise_shape: torch.Tensor, *args, **kwargs):
-    # limit noise_shape length to context_length for more accurate vram use estimation
+def _prepare_sampling_wrapper(executor, model, noise_shape: torch.Tensor, conds, *args, **kwargs):
+    # Scale noise_shape to a single context window so VRAM estimation budgets per-window.
    model_options = kwargs.get("model_options", None)
    if model_options is None:
        raise Exception("model_options not found in prepare_sampling_wrapper; this should never happen, something went wrong.")
    handler: IndexListContextHandler = model_options.get("context_handler", None)
    if handler is not None:
        noise_shape = list(noise_shape)
-        noise_shape[handler.dim] = min(noise_shape[handler.dim], handler.context_length)
-    return executor(model, noise_shape, *args, **kwargs)
+        is_packed = len(noise_shape) == 3 and noise_shape[1] == 1
+        if is_packed:
+            # TODO: latent_shapes cond isn't attached yet at this point, so we can't compute a
+            # per-window flat latent here. Skipping the clamp over-estimates but prevents immediate OOM.
+            pass
+        elif handler.dim < len(noise_shape) and noise_shape[handler.dim] > handler.context_length:
+            noise_shape[handler.dim] = min(noise_shape[handler.dim], handler.context_length)
+    return executor(model, noise_shape, conds, *args, **kwargs)


 def create_prepare_sampling_wrapper(model: ModelPatcher):
@ -395,11 +712,12 @@ def _sampler_sample_wrapper(executor, guider, sigmas, extra_args, callback, nois
        raise Exception("context_handler not found in sampler_sample_wrapper; this should never happen, something went wrong.")
    if not handler.freenoise:
        return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)
-    noise = apply_freenoise(noise, handler.dim, handler.context_length, handler.context_overlap, extra_args["seed"])
+
+    conds = [guider.conds.get('positive', guider.conds.get('negative', []))]
+    noise = handler._apply_freenoise(noise, conds, extra_args["seed"])

    return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)

-
 def create_sampler_sample_wrapper(model: ModelPatcher):
    model.add_wrapper_with_key(
        comfy.patcher_extension.WrappersMP.SAMPLER_SAMPLE,
@ -407,7 +725,6 @@ def create_sampler_sample_wrapper(model: ModelPatcher):
        _sampler_sample_wrapper
    )

-
 def match_weights_to_dim(weights: list[float], x_in: torch.Tensor, dim: int, device=None) -> torch.Tensor:
    total_dims = len(x_in.shape)
    weights_tensor = torch.Tensor(weights).to(device=device)
@ -553,8 +870,9 @@ def get_matching_context_schedule(context_schedule: str) -> ContextSchedule:
    return ContextSchedule(context_schedule, func)


-def get_context_weights(length: int, full_length: int, idxs: list[int], handler: IndexListContextHandler, sigma: torch.Tensor=None):
-    return handler.fuse_method.func(length, sigma=sigma, handler=handler, full_length=full_length, idxs=idxs)
+def get_context_weights(length: int, full_length: int, idxs: list[int], handler: IndexListContextHandler, sigma: torch.Tensor=None, context_overlap: int=None):
+    context_overlap = handler.context_overlap if context_overlap is None else context_overlap
+    return handler.fuse_method.func(length, sigma=sigma, handler=handler, full_length=full_length, idxs=idxs, context_overlap=context_overlap)


 def create_weights_flat(length: int, **kwargs) -> list[float]:
@ -572,18 +890,18 @@ def create_weights_pyramid(length: int, **kwargs) -> list[float]:
        weight_sequence = list(range(1, max_weight, 1)) + [max_weight] + list(range(max_weight - 1, 0, -1))
    return weight_sequence

-def create_weights_overlap_linear(length: int, full_length: int, idxs: list[int], handler: IndexListContextHandler, **kwargs):
+def create_weights_overlap_linear(length: int, full_length: int, idxs: list[int], context_overlap: int, **kwargs):
    # based on code in Kijai's WanVideoWrapper: https://github.com/kijai/ComfyUI-WanVideoWrapper/blob/dbb2523b37e4ccdf45127e5ae33e31362f755c8e/nodes.py#L1302
    # only expected overlap is given different weights
    weights_torch = torch.ones((length))
    # blend left-side on all except first window
    if min(idxs) > 0:
-        ramp_up = torch.linspace(1e-37, 1, handler.context_overlap)
-        weights_torch[:handler.context_overlap] = ramp_up
+        ramp_up = torch.linspace(1e-37, 1, context_overlap)
+        weights_torch[:context_overlap] = ramp_up
    # blend right-side on all except last window
    if max(idxs) < full_length-1:
-        ramp_down = torch.linspace(1, 1e-37, handler.context_overlap)
-        weights_torch[-handler.context_overlap:] = ramp_down
+        ramp_down = torch.linspace(1, 1e-37, context_overlap)
+        weights_torch[-context_overlap:] = ramp_down
    return weights_torch

 class ContextFuseMethods:
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@ -1028,7 +1028,7 @@ class LTXVModel(LTXBaseModel):
        )

        grid_mask = None
-        if keyframe_idxs is not None:
+        if keyframe_idxs is not None and keyframe_idxs.shape[2] > 0:
            additional_args.update({ "orig_patchified_shape": list(x.shape)})
            denoise_mask = self.patchifier.patchify(denoise_mask)[0]
            grid_mask = ~torch.any(denoise_mask < 0, dim=-1)[0]
@ -1315,7 +1315,7 @@ class LTXVModel(LTXBaseModel):
        x = x * (1 + scale) + shift
        x = self.proj_out(x)

-        if keyframe_idxs is not None:
+        if keyframe_idxs is not None and keyframe_idxs.shape[2] > 0:
            grid_mask = kwargs["grid_mask"]
            orig_patchified_shape = kwargs["orig_patchified_shape"]
            full_x = torch.zeros(orig_patchified_shape, dtype=x.dtype, device=x.device)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -21,6 +21,7 @@ import comfy.ldm.hunyuan3dv2_1.hunyuandit
 import torch
 import logging
 import comfy.ldm.lightricks.av_model
+import comfy.ldm.lightricks.symmetric_patchifier
 import comfy.context_windows
 from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep
 from comfy.ldm.cascade.stage_c import StageC
@ -1088,6 +1089,123 @@ class LTXAV(BaseModel):
    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
        return latent_image

+    def map_context_window_to_modalities(self, primary_indices, latent_shapes, dim):
+        result = [primary_indices]
+        if len(latent_shapes) < 2:
+            return result
+
+        video_total = latent_shapes[0][dim]
+
+        for i in range(1, len(latent_shapes)):
+            mod_total = latent_shapes[i][dim]
+            # Map each primary index to its proportional range of modality indices and
+            # concatenate in order. Preserves wrapped/strided geometry so the modality
+            # attends to the same temporal regions as the primary window.
+            mod_indices = []
+            seen = set()
+            for v_idx in primary_indices:
+                a_start = min(int(round(v_idx * mod_total / video_total)), mod_total - 1)
+                a_end = min(int(round((v_idx + 1) * mod_total / video_total)), mod_total)
+                if a_end <= a_start:
+                    a_end = a_start + 1
+                for a in range(a_start, a_end):
+                    if a not in seen:
+                        seen.add(a)
+                        mod_indices.append(a)
+            result.append(mod_indices)
+
+        return result
+
+    @staticmethod
+    def _get_guide_entries(conds):
+        for cond_list in conds:
+            if cond_list is None:
+                continue
+            for cond_dict in cond_list:
+                model_conds = cond_dict.get('model_conds', {})
+                entries = model_conds.get('guide_attention_entries')
+                if entries is not None and hasattr(entries, 'cond') and entries.cond:
+                    return entries.cond
+        return None
+
+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        # Audio denoise mask — slice using audio modality window
+        if cond_key == "audio_denoise_mask" and hasattr(window, 'modality_windows') and window.modality_windows:
+            audio_window = window.modality_windows.get(1)
+            if audio_window is not None and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
+                sliced = audio_window.get_tensor(cond_value.cond, device, dim=2)
+                return cond_value._copy_with(sliced)
+
+        # Video denoise mask — split into video + guide portions, slice each
+        if cond_key == "denoise_mask" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
+            cond_tensor = cond_value.cond
+            guide_count = cond_tensor.size(window.dim) - x_in.size(window.dim)
+            if guide_count > 0:
+                T_video = x_in.size(window.dim)
+                video_mask = cond_tensor.narrow(window.dim, 0, T_video)
+                guide_mask = cond_tensor.narrow(window.dim, T_video, guide_count)
+                sliced_video = window.get_tensor(video_mask, device, retain_index_list=retain_index_list)
+                suffix_indices = window.guide_frames_indices
+                if suffix_indices:
+                    idx = tuple([slice(None)] * window.dim + [suffix_indices])
+                    sliced_guide = guide_mask[idx].to(device)
+                    return cond_value._copy_with(torch.cat([sliced_video, sliced_guide], dim=window.dim))
+                else:
+                    return cond_value._copy_with(sliced_video)
+
+        # Keyframe indices — regenerate pixel coords for window, select guide positions
+        if cond_key == "keyframe_idxs":
+            kf_local_pos = window.guide_kf_local_positions
+            if not kf_local_pos:
+                return cond_value._copy_with(cond_value.cond[:, :, :0, :])  # empty
+            H, W = x_in.shape[3], x_in.shape[4]
+            window_len = len(window.index_list)
+            patchifier = self.diffusion_model.patchifier
+            latent_coords = patchifier.get_latent_coords(window_len, H, W, 1, cond_value.cond.device)
+            scale_factors = self.diffusion_model.vae_scale_factors
+            pixel_coords = comfy.ldm.lightricks.symmetric_patchifier.latent_to_pixel_coords(
+                latent_coords,
+                scale_factors,
+                causal_fix=self.diffusion_model.causal_temporal_positioning)
+            tokens = []
+            for pos in kf_local_pos:
+                tokens.extend(range(pos * H * W, (pos + 1) * H * W))
+            pixel_coords = pixel_coords[:, :, tokens, :]
+
+            # Adjust spatial end positions for dilated (downscaled) guides.
+            # Each guide entry may have a different downscale factor; expand the
+            # per-entry factor to cover all tokens belonging to that entry.
+            downscale_factors = window.guide_downscale_factors
+            overlap_info = window.guide_overlap_info
+            if downscale_factors:
+                per_token_factor = []
+                for (entry_idx, overlap_count), dsf in zip(overlap_info, downscale_factors):
+                    per_token_factor.extend([dsf] * (overlap_count * H * W))
+                factor_tensor = torch.tensor(per_token_factor, device=pixel_coords.device, dtype=pixel_coords.dtype)
+                spatial_end_offset = (factor_tensor.unsqueeze(0).unsqueeze(0).unsqueeze(-1) - 1) * torch.tensor(
+                    scale_factors[1:], device=pixel_coords.device, dtype=pixel_coords.dtype,
+                ).view(1, -1, 1, 1)
+                pixel_coords[:, 1:, :, 1:] += spatial_end_offset
+
+            B = cond_value.cond.shape[0]
+            if B > 1:
+                pixel_coords = pixel_coords.expand(B, -1, -1, -1)
+            return cond_value._copy_with(pixel_coords)
+
+        # Guide attention entries — adjust per-guide counts based on window overlap
+        if cond_key == "guide_attention_entries":
+            overlap_info = window.guide_overlap_info
+            H, W = x_in.shape[3], x_in.shape[4]
+            new_entries = []
+            for entry_idx, overlap_count in overlap_info:
+                e = cond_value.cond[entry_idx]
+                new_entries.append({**e,
+                    "pre_filter_count": overlap_count * H * W,
+                    "latent_shape": [overlap_count, H, W]})
+            return cond_value._copy_with(new_entries)
+
+        return None
+
 class HunyuanVideo(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -112,10 +112,6 @@ if args.directml is not None:
    # torch_directml.disable_tiled_resources(True)
    lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.

-try:
-    import intel_extension_for_pytorch as ipex  # noqa: F401
-except:
-    pass

 try:
    _ = torch.xpu.device_count()
@ -583,9 +579,6 @@ class LoadedModel:

        real_model = self.model.model

-        if is_intel_xpu() and not args.disable_ipex_optimize and 'ipex' in globals() and real_model is not None:
-            with torch.no_grad():
-                real_model = ipex.optimize(real_model.eval(), inplace=True, graph_mode=True, concat_linear=True)

        self.real_model = weakref.ref(real_model)
        self.model_finalizer = weakref.finalize(real_model, cleanup_models)
@ -1581,10 +1574,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
        return False

    if is_intel_xpu():
-        if torch_version_numeric < (2, 3):
-            return True
-        else:
-            return torch.xpu.get_device_properties(device).has_fp16
+        return torch.xpu.get_device_properties(device).has_fp16

    if is_ascend_npu():
        return True
@ -1650,10 +1640,7 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
        return False

    if is_intel_xpu():
-        if torch_version_numeric < (2, 3):
-            return True
-        else:
-            return torch.xpu.is_bf16_supported()
+        return torch.xpu.is_bf16_supported()

    if is_ascend_npu():
        return True
@ -1784,6 +1771,7 @@ def soft_empty_cache(force=False):
    if cpu_state == CPUState.MPS:
        torch.mps.empty_cache()
    elif is_intel_xpu():
+        torch.xpu.synchronize()
        torch.xpu.empty_cache()
    elif is_ascend_npu():
        torch.npu.empty_cache()
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@ -1403,7 +1403,6 @@ class ByteDance2TextToVideoNode(IO.ComfyNode):
            status_extractor=lambda r: r.status,
            price_extractor=_seedance2_price_extractor(model_id, has_video_input=False),
            poll_interval=9,
-            max_poll_attempts=180,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))

@ -1585,7 +1584,6 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
            status_extractor=lambda r: r.status,
            price_extractor=_seedance2_price_extractor(model_id, has_video_input=False),
            poll_interval=9,
-            max_poll_attempts=180,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))

@ -1907,7 +1905,6 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
            status_extractor=lambda r: r.status,
            price_extractor=_seedance2_price_extractor(model_id, has_video_input=has_video_input),
            poll_interval=9,
-            max_poll_attempts=180,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))

--- a/comfy_api_nodes/nodes_hitpaw.py
+++ b/comfy_api_nodes/nodes_hitpaw.py
@ -178,7 +178,6 @@ class HitPawGeneralImageEnhance(IO.ComfyNode):
            status_extractor=lambda x: x.data.status,
            price_extractor=lambda x: request_price,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.res_url))

@ -324,7 +323,6 @@ class HitPawVideoEnhance(IO.ComfyNode):
            status_extractor=lambda x: x.data.status,
            price_extractor=lambda x: request_price,
            poll_interval=10.0,
-            max_poll_attempts=320,
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.res_url))

--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -276,7 +276,6 @@ async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusRe
        cls,
        ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
        response_model=TaskStatusResponse,
-        max_poll_attempts=280,
        status_extractor=lambda r: (r.data.task_status if r.data else None),
    )
    return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@ -3062,7 +3061,6 @@ class KlingVideoNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=poll_path),
            response_model=TaskStatusResponse,
-            max_poll_attempts=280,
            status_extractor=lambda r: (r.data.task_status if r.data else None),
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@ -3188,7 +3186,6 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
            response_model=TaskStatusResponse,
-            max_poll_attempts=280,
            status_extractor=lambda r: (r.data.task_status if r.data else None),
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@ -230,7 +230,6 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
            status_extractor=lambda x: x.status,
            price_extractor=lambda _: price_usd,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))

@ -391,7 +390,6 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
            status_extractor=lambda x: x.status,
            price_extractor=lambda _: price_usd,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))

@ -541,7 +539,6 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
            response_model=TaskResponse,
            status_extractor=lambda x: x.status,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))

@ -782,7 +779,6 @@ class MagnificImageRelightNode(IO.ComfyNode):
            response_model=TaskResponse,
            status_extractor=lambda x: x.status,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))

@ -924,7 +920,6 @@ class MagnificImageSkinEnhancerNode(IO.ComfyNode):
            response_model=TaskResponse,
            status_extractor=lambda x: x.status,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))

--- a/comfy_api_nodes/nodes_topaz.py
+++ b/comfy_api_nodes/nodes_topaz.py
@ -453,7 +453,6 @@ class TopazVideoEnhance(IO.ComfyNode):
            progress_extractor=lambda x: getattr(x, "progress", 0),
            price_extractor=lambda x: (x.estimates.cost[0] * 0.08 if x.estimates and x.estimates.cost[0] else None),
            poll_interval=10.0,
-            max_poll_attempts=320,
        )
        return IO.NodeOutput(await download_url_to_video_output(final_response.download.url))

--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@ -38,7 +38,7 @@ async def execute_task(
    cls: type[IO.ComfyNode],
    vidu_endpoint: str,
    payload: TaskCreationRequest | TaskExtendCreationRequest | TaskMultiFrameCreationRequest,
-    max_poll_attempts: int = 320,
+    max_poll_attempts: int = 480,
 ) -> list[TaskResult]:
    task_creation_response = await sync_op(
        cls,
@ -1097,7 +1097,6 @@ class ViduExtendVideoNode(IO.ComfyNode):
                video_url=await upload_video_to_comfyapi(cls, video, wait_label="Uploading video"),
                images=[image_url] if image_url else None,
            ),
-            max_poll_attempts=480,
        )
        return IO.NodeOutput(await download_url_to_video_output(results[0].url))

--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@ -818,7 +818,6 @@ class WanReferenceVideoApi(IO.ComfyNode):
            response_model=VideoTaskStatusResponse,
            status_extractor=lambda x: x.output.task_status,
            poll_interval=6,
-            max_poll_attempts=280,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))

--- a/comfy_api_nodes/nodes_wavespeed.py
+++ b/comfy_api_nodes/nodes_wavespeed.py
@ -84,7 +84,6 @@ class WavespeedFlashVSRNode(IO.ComfyNode):
            response_model=TaskResultResponse,
            status_extractor=lambda x: "failed" if x.data is None else x.data.status,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        if final_response.code != 200:
            raise ValueError(
@ -156,7 +155,6 @@ class WavespeedImageUpscaleNode(IO.ComfyNode):
            response_model=TaskResultResponse,
            status_extractor=lambda x: "failed" if x.data is None else x.data.status,
            poll_interval=10.0,
-            max_poll_attempts=480,
        )
        if final_response.code != 200:
            raise ValueError(
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@ -148,7 +148,7 @@ async def poll_op(
    queued_statuses: list[str | int] | None = None,
    data: BaseModel | None = None,
    poll_interval: float = 5.0,
-    max_poll_attempts: int = 160,
+    max_poll_attempts: int = 480,
    timeout_per_poll: float = 120.0,
    max_retries_per_poll: int = 10,
    retry_delay_per_poll: float = 1.0,
@ -254,7 +254,7 @@ async def poll_op_raw(
    queued_statuses: list[str | int] | None = None,
    data: dict[str, Any] | BaseModel | None = None,
    poll_interval: float = 5.0,
-    max_poll_attempts: int = 160,
+    max_poll_attempts: int = 480,
    timeout_per_poll: float = 120.0,
    max_retries_per_poll: int = 10,
    retry_delay_per_poll: float = 1.0,
--- a/comfy_extras/nodes_context_windows.py
+++ b/comfy_extras/nodes_context_windows.py
@ -14,21 +14,22 @@ class ContextWindowsManualNode(io.ComfyNode):
            description="Manually set context windows.",
            inputs=[
                io.Model.Input("model", tooltip="The model to apply context windows to during sampling."),
-                io.Int.Input("context_length", min=1, default=16, tooltip="The length of the context window.", advanced=True),
-                io.Int.Input("context_overlap", min=0, default=4, tooltip="The overlap of the context window.", advanced=True),
+                io.Int.Input("context_length", min=1, default=16, tooltip="The length of the context window."),
+                io.Int.Input("context_overlap", min=0, default=4, tooltip="The overlap of the context window."),
                io.Combo.Input("context_schedule", options=[
                    comfy.context_windows.ContextSchedules.STATIC_STANDARD,
                    comfy.context_windows.ContextSchedules.UNIFORM_STANDARD,
                    comfy.context_windows.ContextSchedules.UNIFORM_LOOPED,
                    comfy.context_windows.ContextSchedules.BATCHED,
-                    ], tooltip="The stride of the context window."),
-                io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules.", advanced=True),
+                    ], default=comfy.context_windows.ContextSchedules.STATIC_STANDARD, tooltip="Step-dependent scheduling algorithm for context windows."),
+                io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules."),
                io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules."),
                io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."),
                io.Int.Input("dim", min=0, max=5, default=0, tooltip="The dimension to apply the context windows to."),
                io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."),
-                io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."),
+                io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window. For concat-style I2V models (e.g. Wan I2V, HunyuanVideo I2V, Cosmos I2V, SVD) the encoded start image lives in the c_concat conditioning channels; setting this to '0' will retain that start image content at sub-pos 0 of every window."),
                io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."),
+                io.String.Input("latent_retain_index_list", default="", tooltip="List of latent indices to retain in the noise latent itself for each window. Use for workflows where reference content (e.g. a start image) lives directly in the noise latent rather than in separate conditioning channels (e.g. inplace-style I2V like LTXV, AnimateDiff). Independent of cond_retain_index_list."),
            ],
            outputs=[
                io.Model.Output(tooltip="The model with context windows applied during sampling."),
@ -38,7 +39,7 @@ class ContextWindowsManualNode(io.ComfyNode):

    @classmethod
    def execute(cls, model: io.Model.Type, context_length: int, context_overlap: int, context_schedule: str, context_stride: int, closed_loop: bool, fuse_method: str, dim: int, freenoise: bool,
-                cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False) -> io.Model:
+                cond_retain_index_list: list[int]=[],  split_conds_to_windows: bool=False, latent_retain_index_list: list[int]=[]) -> io.Model:
        model = model.clone()
        model.model_options["context_handler"] = comfy.context_windows.IndexListContextHandler(
            context_schedule=comfy.context_windows.get_matching_context_schedule(context_schedule),
@ -50,7 +51,8 @@ class ContextWindowsManualNode(io.ComfyNode):
            dim=dim,
            freenoise=freenoise,
            cond_retain_index_list=cond_retain_index_list,
-            split_conds_to_windows=split_conds_to_windows
+            split_conds_to_windows=split_conds_to_windows,
+            latent_retain_index_list=latent_retain_index_list
        )
        # make memory usage calculation only take into account the context window latents
        comfy.context_windows.create_prepare_sampling_wrapper(model)
@ -63,33 +65,70 @@ class WanContextWindowsManualNode(ContextWindowsManualNode):
    def define_schema(cls) -> io.Schema:
        schema = super().define_schema()
        schema.node_id = "WanContextWindowsManual"
-        schema.display_name = "WAN Context Windows (Manual)"
-        schema.description = "Manually set context windows for WAN-like models (dim=2)."
+        schema.display_name = "Wan Context Windows"
+        schema.description = "Set context windows for Wan-like models."
        schema.inputs = [
            io.Model.Input("model", tooltip="The model to apply context windows to during sampling."),
-                io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window.", advanced=True),
-                io.Int.Input("context_overlap", min=0, default=30, tooltip="The overlap of the context window.", advanced=True),
+                io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window in real frames. Must be 4*n + 1."),
+                io.Int.Input("context_overlap", min=0, default=30, tooltip="The overlap of the context window in real frames."),
                io.Combo.Input("context_schedule", options=[
                    comfy.context_windows.ContextSchedules.STATIC_STANDARD,
                    comfy.context_windows.ContextSchedules.UNIFORM_STANDARD,
                    comfy.context_windows.ContextSchedules.UNIFORM_LOOPED,
                    comfy.context_windows.ContextSchedules.BATCHED,
-                    ], tooltip="The stride of the context window."),
+                    ], default=comfy.context_windows.ContextSchedules.UNIFORM_STANDARD, tooltip="Step-dependent scheduling algorithm for context windows."),
                io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules.", advanced=True),
-                io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules."),
+                io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules.", advanced=True),
                io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."),
-                io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."),
-                #io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."),
-                #io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."),
+                io.Boolean.Input("freenoise", default=True, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending.", advanced=True),
+                io.Boolean.Input("retain_first_frame", default=False, tooltip="Retain the first I2V frame in every context window (may help retain initial reference)."),
+                io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index.", advanced=True),
        ]
        return schema

    @classmethod
    def execute(cls, model: io.Model.Type, context_length: int, context_overlap: int, context_schedule: str, context_stride: int, closed_loop: bool, fuse_method: str, freenoise: bool,
-                cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False) -> io.Model:
-        context_length = max(((context_length - 1) // 4) + 1, 1)  # at least length 1
-        context_overlap = max(((context_overlap - 1) // 4) + 1, 0)  # at least overlap 0
-        return super().execute(model, context_length, context_overlap, context_schedule, context_stride, closed_loop, fuse_method, dim=2, freenoise=freenoise, cond_retain_index_list=cond_retain_index_list, split_conds_to_windows=split_conds_to_windows)
+                retain_first_frame: bool=False, split_conds_to_windows: bool=False) -> io.Model:
+        context_length = max(((context_length - 1) // 4) + 1, 1) # at least length 1
+        context_overlap = max(context_overlap // 4, 0)  # at least overlap 0
+        retain_index_list = "0" if retain_first_frame else ""
+        return super().execute(model, context_length, context_overlap, context_schedule, context_stride, closed_loop, fuse_method, dim=2, freenoise=freenoise, cond_retain_index_list=retain_index_list, split_conds_to_windows=split_conds_to_windows)
+
+
+class LTXVContextWindowsNode(ContextWindowsManualNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        schema = super().define_schema()
+        schema.node_id = "LTXVContextWindows"
+        schema.display_name = "LTXV Context Windows"
+        schema.description = "Set context windows for LTXV-like models."
+        schema.inputs = [
+            io.Model.Input("model", tooltip="The model to apply context windows to during sampling."),
+            io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=8, default=145, tooltip="The length of the context window in real frames. Must be 8*n + 1."),
+            io.Int.Input("context_overlap", min=0, step=8, default=40, tooltip="The overlap of the context window in real frames."),
+            io.Combo.Input("context_schedule", options=[
+                comfy.context_windows.ContextSchedules.STATIC_STANDARD,
+                comfy.context_windows.ContextSchedules.UNIFORM_STANDARD,
+                comfy.context_windows.ContextSchedules.UNIFORM_LOOPED,
+                comfy.context_windows.ContextSchedules.BATCHED,
+                ], default=comfy.context_windows.ContextSchedules.UNIFORM_STANDARD, tooltip="Step-dependent scheduling algorithm for context windows."),
+            io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules.", advanced=True),
+            io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules.", advanced=True),
+            io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."),
+            io.Boolean.Input("freenoise", default=True, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending.", advanced=True),
+            io.Boolean.Input("retain_first_frame", default=False, tooltip="Retain the first latent frame in every context window (may help retain initial reference)."),
+            io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index.", advanced=True),
+        ]
+        return schema
+
+    @classmethod
+    def execute(cls, model: io.Model.Type, context_length: int, context_overlap: int, context_schedule: str, fuse_method: str, freenoise: bool,
+                retain_first_frame: bool=False, split_conds_to_windows: bool=False, context_stride: int=1, closed_loop: bool=False) -> io.Model:
+        context_length = max(((context_length - 1) // 8) + 1, 1)  # at least length 1
+        context_overlap = max(context_overlap // 8, 0)  # at least overlap 0
+        retain_index_list = "0" if retain_first_frame else ""
+        return super().execute(model, context_length, context_overlap, context_schedule, context_stride, closed_loop, fuse_method, dim=2, freenoise=freenoise,
+                               cond_retain_index_list=retain_index_list, latent_retain_index_list=retain_index_list, split_conds_to_windows=split_conds_to_windows)


 class ContextWindowsExtension(ComfyExtension):
@ -97,6 +136,7 @@ class ContextWindowsExtension(ComfyExtension):
        return [
            ContextWindowsManualNode,
            WanContextWindowsManualNode,
+            LTXVContextWindowsNode,
        ]

 def comfy_entrypoint():
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -136,7 +136,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
    generate = execute  # TODO: remove


-def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0):
+def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0, latent_start=0):
    """Append a guide_attention_entry to both positive and negative conditioning.

    Each entry tracks one guide reference for per-reference attention control.
@ -147,6 +147,7 @@ def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_s
        "strength": strength,
        "pixel_mask": None,
        "latent_shape": latent_shape,
+        "latent_start": latent_start,
    }
    results = []
    for cond in (positive, negative):
@ -363,6 +364,7 @@ class LTXVAddGuide(io.ComfyNode):
        guide_latent_shape = list(t.shape[2:])  # [F, H, W]
        positive, negative = _append_guide_attention_entry(
            positive, negative, pre_filter_count, guide_latent_shape, strength=strength,
+            latent_start=latent_idx,
        )

        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
--- a/comfy_extras/nodes_sdpose.py
+++ b/comfy_extras/nodes_sdpose.py
@ -459,27 +459,23 @@ class SDPoseKeypointExtractor(io.ComfyNode):
        total_images = image.shape[0]
        captured_feat = None

-        model_h = int(head.heatmap_size[0]) * 4   # e.g. 192 * 4 = 768
-        model_w = int(head.heatmap_size[1]) * 4   # e.g. 256 * 4 = 1024
+        model_w = int(head.heatmap_size[0]) * 4   # 192 * 4 = 768
+        model_h = int(head.heatmap_size[1]) * 4   # 256 * 4 = 1024

        def _resize_to_model(imgs):
-            """Aspect-preserving resize + zero-pad BHWC images to (model_h, model_w). Returns (resized_bhwc, scale, pad_top, pad_left)."""
+            """Stretch BHWC images to (model_h, model_w), model expects no aspect preservation."""
            h, w = imgs.shape[-3], imgs.shape[-2]
-            scale = min(model_h / h, model_w / w)
-            sh, sw = int(round(h * scale)), int(round(w * scale))
-            pt, pl = (model_h - sh) // 2, (model_w - sw) // 2
+            method = "area" if (model_h <= h and model_w <= w) else "bilinear"
            chw = imgs.permute(0, 3, 1, 2).float()
-            scaled = comfy.utils.common_upscale(chw, sw, sh, upscale_method="bilinear", crop="disabled")
-            padded = torch.zeros(scaled.shape[0], scaled.shape[1], model_h, model_w, dtype=scaled.dtype, device=scaled.device)
-            padded[:, :, pt:pt + sh, pl:pl + sw] = scaled
-            return padded.permute(0, 2, 3, 1), scale, pt, pl
+            scaled = comfy.utils.common_upscale(chw, model_w, model_h, upscale_method=method, crop="disabled")
+            return scaled.permute(0, 2, 3, 1), model_w / w, model_h / h

-        def _remap_keypoints(kp, scale, pad_top, pad_left, offset_x=0, offset_y=0):
+        def _remap_keypoints(kp, scale_x, scale_y, offset_x=0, offset_y=0):
            """Remap keypoints from model space back to original image space."""
            kp = kp.copy() if isinstance(kp, np.ndarray) else np.array(kp, dtype=np.float32)
            invalid = kp[..., 0] < 0
-            kp[..., 0] = (kp[..., 0] - pad_left) / scale + offset_x
-            kp[..., 1] = (kp[..., 1] - pad_top)  / scale + offset_y
+            kp[..., 0] = kp[..., 0] / scale_x + offset_x
+            kp[..., 1] = kp[..., 1] / scale_y + offset_y
            kp[invalid] = -1
            return kp

@ -529,18 +525,18 @@ class SDPoseKeypointExtractor(io.ComfyNode):
                            continue

                        crop = img[:, y1:y2, x1:x2, :]  # (1, crop_h, crop_w, C)
-                        crop_resized, scale, pad_top, pad_left = _resize_to_model(crop)
+                        crop_resized, sx, sy = _resize_to_model(crop)

                        latent_crop = vae.encode(crop_resized)
                        kp_batch, sc_batch = _run_on_latent(latent_crop)
-                        kp = _remap_keypoints(kp_batch[0], scale, pad_top, pad_left, x1, y1)
+                        kp = _remap_keypoints(kp_batch[0], sx, sy, x1, y1)
                        img_keypoints.append(kp)
                        img_scores.append(sc_batch[0])
                else:
-                    img_resized, scale, pad_top, pad_left = _resize_to_model(img)
+                    img_resized, sx, sy = _resize_to_model(img)
                    latent_img = vae.encode(img_resized)
                    kp_batch, sc_batch = _run_on_latent(latent_img)
-                    img_keypoints.append(_remap_keypoints(kp_batch[0], scale, pad_top, pad_left))
+                    img_keypoints.append(_remap_keypoints(kp_batch[0], sx, sy))
                    img_scores.append(sc_batch[0])

                all_keypoints.append(img_keypoints)
@ -549,12 +545,12 @@ class SDPoseKeypointExtractor(io.ComfyNode):

        else: # full-image mode, batched
            for batch_start in tqdm(range(0, total_images, batch_size), desc="Extracting keypoints"):
-                batch_resized, scale, pad_top, pad_left = _resize_to_model(image[batch_start:batch_start + batch_size])
+                batch_resized, sx, sy = _resize_to_model(image[batch_start:batch_start + batch_size])
                latent_batch = vae.encode(batch_resized)
                kp_batch, sc_batch = _run_on_latent(latent_batch)

                for kp, sc in zip(kp_batch, sc_batch):
-                    all_keypoints.append([_remap_keypoints(kp, scale, pad_top, pad_left)])
+                    all_keypoints.append([_remap_keypoints(kp, sx, sy)])
                    all_scores.append([sc])

                pbar.update(len(kp_batch))
@ -727,13 +723,13 @@ class CropByBBoxes(io.ComfyNode):
                scale = min(output_width / crop_w, output_height / crop_h)
                scaled_w = int(round(crop_w * scale))
                scaled_h = int(round(crop_h * scale))
-                scaled = comfy.utils.common_upscale(crop_chw, scaled_w, scaled_h, upscale_method="bilinear", crop="disabled")
+                scaled = comfy.utils.common_upscale(crop_chw, scaled_w, scaled_h, upscale_method="area", crop="disabled")
                pad_left = (output_width  - scaled_w) // 2
                pad_top  = (output_height - scaled_h) // 2
                resized = torch.zeros(1, num_ch, output_height, output_width, dtype=image.dtype, device=image.device)
                resized[:, :, pad_top:pad_top + scaled_h, pad_left:pad_left + scaled_w] = scaled
            else:  # "stretch"
-                resized = comfy.utils.common_upscale(crop_chw, output_width, output_height, upscale_method="bilinear", crop="disabled")
+                resized = comfy.utils.common_upscale(crop_chw, output_width, output_height, upscale_method="area", crop="disabled")
            crops.append(resized)

        if not crops:
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.65
+comfyui-workflow-templates==0.9.66
 comfyui-embedded-docs==0.4.4
 torch
 torchsde
Author	SHA1	Message	Date
drozbay	4816e39370	Merge `1226e301ad` into `3e3ed8cc2a`	2026-05-02 09:08:15 -07:00
comfyanonymous	3e3ed8cc2a	Add script in AMD portable to launch with dynamic vram. (#13667 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-01 20:19:46 -04:00
comfyanonymous	67f6cb3527	List all the portable downloads in the README section. (#13666 )	2026-05-01 20:19:32 -04:00
Alexis Rolland	0230e0e7cc	Adding kijai (#13664 ) Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-05-02 06:37:18 +08:00
Jukka Seppänen	b5921c8ac2	SDPose: resize fix (#13656 )	2026-05-01 14:17:25 -07:00
Simon Lui	63103d519e	Remove IPEX and clean up checks and add missing synchronize during empty cache. (#13653 )	2026-05-01 14:16:41 -07:00
Alexander Piskun	cf758bd256	chore(api-nodes): increase default timeout for partner API node tasks (#13663 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Build package / Build Test (3.10) (push) Has been cancelled Details Build package / Build Test (3.11) (push) Has been cancelled Details Build package / Build Test (3.12) (push) Has been cancelled Details Build package / Build Test (3.13) (push) Has been cancelled Details Build package / Build Test (3.14) (push) Has been cancelled Details Signed-off-by: bigcat88 <bigcat88@icloud.com> Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-05-01 12:48:41 -07:00
Daxiong (Lin)	10b45a71cd	chore: update workflow templates to v0.9.66 (#13662 ) Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-05-01 12:11:30 -07:00
ozbayb	1226e301ad	Clean up model-specific context windows nodes	2026-04-30 16:22:12 -06:00
drozbay	f959e6bfa1	Merge branch 'master' into 20260322a_ltx_contextwin	2026-04-26 17:01:18 -06:00
ozbayb	e949250876	Add LTXVContextWindows node	2026-04-24 12:18:22 -06:00
ozbayb	4e434bccaa	Create separate latent_retain_index_list to ensure that inplace latent retention doesn't occur on all models when unintended.	2026-04-24 09:47:49 -06:00
ozbayb	6442392810	Add defensive dtype cast before sigma step check	2026-04-13 15:19:25 -06:00
ozbayb	6a53695006	LTX2 context windows - Skip guide frames in freenoise shuffle	2026-04-12 20:18:10 -06:00
ozbayb	a8b084ed58	LTX2 context windows - Thread per-modality overlap into fuse weights	2026-04-12 18:42:45 -06:00
ozbayb	f72583d1f3	LTX2 context windows - Move symmetric_patchifier import to module level	2026-04-12 15:46:20 -06:00
ozbayb	d59d6fb7a0	LTX2 context windows - Skip VRAM estimate clamp for packed latents	2026-04-12 15:43:36 -06:00
ozbayb	b348c7fa61	Merge branch '20260322a_ltx_contextwin' of https://github.com/drozbay/ComfyUI into 20260322a_ltx_contextwin	2026-04-12 14:54:00 -06:00
ozbayb	f1f3182be1	LTX2 context windows - Fix audio index mapping for wrapped/strided primary windows The previous window-level calculation collapsed wrapped or strided primary windows into a contiguous audio tail, so audio attended to a different temporal region than the video. Replace with per-frame mapping that computes each primary index's audio span independently and concatenates in order.	2026-04-12 14:52:54 -06:00
drozbay	32e9e4a694	Merge branch 'master' into 20260322a_ltx_contextwin	2026-04-12 12:22:31 -06:00
ozbayb	ae3830a6d2	LTX2 Context Windows - Collect multimodal methods into WindowingState; Condense execution path to treat all latents as potentially multimodal	2026-04-11 11:31:04 -06:00
ozbayb	88643f3978	Fix logging of guide frame number	2026-04-07 14:11:19 -06:00
ozbayb	d1a9e2e4df	Fix whitespace	2026-04-07 13:25:31 -06:00
ozbayb	d5badc5f38	LTX2 context windows - Clean up unnecessary code	2026-04-07 13:00:38 -06:00
ozbayb	c9edd2d7c0	LTX2 context windows - Add handling for downscaled IC-Lora guide frames	2026-04-07 12:44:28 -06:00
ozbayb	f1acd5bd85	LTX2 context windows - Cleanup: Simplify window data handling, improve variable names, refactor and condense new context window methods to separate execution paths cleanly	2026-04-07 12:43:41 -06:00
ozbayb	3a061f4bbf	LTX2 context windows - Cleanup: Simplify IndexListContextHandler standard execute path	2026-04-06 15:13:46 -06:00
ozbayb	874690c01c	LTX2 context windows - Refactor guide logic from context_windows into LTXAV model hooks	2026-04-06 11:44:14 -06:00
ozbayb	350237618d	LTX2 context windows - Cleanup: Remove model specific code from BaseModel. Older LTXV model's guides + context_windows will need to be re-implemented but outside the scope of LTX2 changes	2026-04-06 10:10:19 -06:00
ozbayb	3660533f83	LTX2 context windows - Cleanup: latent_start value is required for context windows with guides	2026-04-06 10:10:19 -06:00
ozbayb	71712472f5	LTX2 context windows - Ensure that inplace latent images are retained properly with the retain index list	2026-04-06 10:10:19 -06:00
ozbayb	9566c18ced	LTX2 context windows - Fix crash when a window doesn't have a guide index	2026-04-06 10:10:19 -06:00
ozbayb	ef61ddfaed	Fix freenoise application for LTXAV context windows, fix audio mapping to context windows	2026-04-06 10:10:19 -06:00
ozbayb	115dbb69d1	LTX2 context windows part 3 - Generalize guide splitting to windows	2026-04-06 10:10:19 -06:00
ozbayb	941d50e777	LTX2 context windows part 2b - Calculate guide parameters in model code, refactor	2026-04-06 10:10:19 -06:00
ozbayb	56de390c25	LTX2 context windows part 2 - Guide aware processing	2026-04-06 10:10:19 -06:00
ozbayb	5bfe660b7c	Test implementation for LTX2 context windows	2026-04-06 10:10:19 -06:00