Merge commit 'a4787ac83bf6c83eeb459ed80fc9b36f63d2a3a7' of github.com:comfyanonymous/ComfyUI into fix-merge

2026-01-08 13:20:50 +08:00 · 2025-10-21 10:53:43 -07:00 · 2025-10-21 10:53:43 -07:00 · be56a14e65
commit be56a14e65
parent efb8350cb4 a4787ac83b
120 changed files with 9370 additions and 6547 deletions
--- a/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
@ -0,0 +1,27 @@
+As of the time of writing this you need this preview driver for best results:
+https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-PREVIEW.html
+
+HOW TO RUN:
+
+If you have a AMD gpu:
+
+run_amd_gpu.bat
+
+If you have memory issues you can try disabling the smart memory management by running comfyui with:
+
+run_amd_gpu_disable_smart_memory.bat
+
+IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints
+
+You can download the stable diffusion XL one from: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0_0.9vae.safetensors
+
+
+RECOMMENDED WAY TO UPDATE:
+To update the ComfyUI code: update\update_comfyui.bat
+
+
+TO SHARE MODELS BETWEEN COMFYUI AND ANOTHER UI:
+In the ComfyUI directory you will find a file: extra_model_paths.yaml.example
+Rename this file to: extra_model_paths.yaml and edit it with your favorite text editor.
+
+
--- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
@ -0,0 +1,2 @@
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
+pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@ -0,0 +1,2 @@
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
+pause
--- a/.github/workflows/release-stable-all.yml
+++ b/.github/workflows/release-stable-all.yml
@ -0,0 +1,61 @@
+name: "Release Stable All Portable Versions"
+
+on:
+  workflow_dispatch:
+    inputs:
+      git_tag:
+        description: 'Git tag'
+        required: true
+        type: string
+
+jobs:
+  release_nvidia_default:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release NVIDIA Default (cu129)"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "cu129"
+      python_minor: "13"
+      python_patch: "6"
+      rel_name: "nvidia"
+      rel_extra_name: ""
+      test_release: true
+    secrets: inherit
+
+  release_nvidia_cu128:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release NVIDIA cu128"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "cu128"
+      python_minor: "12"
+      python_patch: "10"
+      rel_name: "nvidia"
+      rel_extra_name: "_cu128"
+      test_release: true
+    secrets: inherit
+
+  release_amd_rocm:
+    permissions:
+      contents: "write"
+      packages: "write"
+      pull-requests: "read"
+    name: "Release AMD ROCm 6.4.4"
+    uses: ./.github/workflows/stable-release.yml
+    with:
+      git_tag: ${{ inputs.git_tag }}
+      cache_tag: "rocm644"
+      python_minor: "12"
+      python_patch: "10"
+      rel_name: "amd"
+      rel_extra_name: ""
+      test_release: false
+    secrets: inherit
--- a/.github/workflows/windows_release_dependencies_manual.yml
+++ b/.github/workflows/windows_release_dependencies_manual.yml
@ -0,0 +1,64 @@
+name: "Windows Release dependencies Manual"
+
+on:
+  workflow_dispatch:
+    inputs:
+      torch_dependencies:
+        description: 'torch dependencies'
+        required: false
+        type: string
+        default: "torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128"
+      cache_tag:
+        description: 'Cached dependencies tag'
+        required: true
+        type: string
+        default: "cu128"
+
+      python_minor:
+        description: 'python minor version'
+        required: true
+        type: string
+        default: "12"
+
+      python_patch:
+        description: 'python patch version'
+        required: true
+        type: string
+        default: "10"
+
+jobs:
+  build_dependencies:
+    runs-on: windows-latest
+    steps:
+        - uses: actions/checkout@v4
+        - uses: actions/setup-python@v5
+          with:
+            python-version: 3.${{ inputs.python_minor }}.${{ inputs.python_patch }}
+
+        - shell: bash
+          run: |
+            echo "@echo off
+            call update_comfyui.bat nopause
+            echo -
+            echo This will try to update pytorch and all python dependencies.
+            echo -
+            echo If you just want to update normally, close this and run update_comfyui.bat instead.
+            echo -
+            pause
+            ..\python_embeded\python.exe -s -m pip install --upgrade ${{ inputs.torch_dependencies }} -r ../ComfyUI/requirements.txt pygit2
+            pause" > update_comfyui_and_python_dependencies.bat
+
+            grep -v comfyui requirements.txt > requirements_nocomfyui.txt
+            python -m pip wheel --no-cache-dir ${{ inputs.torch_dependencies }} -r requirements_nocomfyui.txt pygit2 -w ./temp_wheel_dir
+            python -m pip install --no-cache-dir ./temp_wheel_dir/*
+            echo installed basic
+            ls -lah temp_wheel_dir
+            mv temp_wheel_dir ${{ inputs.cache_tag }}_python_deps
+            tar cf ${{ inputs.cache_tag }}_python_deps.tar ${{ inputs.cache_tag }}_python_deps
+
+        - uses: actions/cache/save@v4
+          with:
+            path: |
+              ${{ inputs.cache_tag }}_python_deps.tar
+              update_comfyui_and_python_dependencies.bat
+            key: ${{ runner.os }}-build-${{ inputs.cache_tag }}-${{ inputs.python_minor }}
--- a/29
+++ b/29
@ -1,25 +1,6 @@
 # Admins
-*    @doctorpangloss
-
-# Note: Github teams syntax cannot be used here as the repo is not owned by Comfy-Org.
-# Inlined the team members for now.
-
-# Maintainers
-*.md @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/tests/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/tests-unit/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/notebooks/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/script_examples/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/.github/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/requirements.txt @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/pyproject.toml @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-
-# Python web server
-/api_server/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-/app/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-/utils/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-
-# Node developers
-/comfy_extras/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
-/comfy/comfy_types/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
-/comfy_api_nodes/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
+# From upstream
+* @comfyanonymous
+* @kosinkadink
+# For the fork
+* @doctorpangloss
--- a/comfy/init.py
+++ b/comfy/init.py
@ -1,6 +1,6 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.60"
+__version__ = "0.3.65"

 # This deals with workspace issues
 from comfy_compatibility.workspace import auto_patch_workspace_and_restart
--- a/comfy/app/frontend_management.py
+++ b/comfy/app/frontend_management.py
@ -147,6 +147,20 @@ class FrontendManager:
        except Exception as exc_info:
            return "1.23.4"

+    @classmethod
+    def get_installed_templates_version(cls) -> str:
+        """Get the currently installed workflow templates package version."""
+        try:
+            templates_version_str = version("comfyui-workflow-templates")
+            return templates_version_str
+        except Exception:
+            return None
+
+    @classmethod
+    def get_required_templates_version(cls) -> str:
+        # returns a stub, since this isn't a helpful check in this environment
+        return "0.0.1"
+
    @classmethod
    def default_frontend_path(cls) -> str:
        try:
@ -154,7 +168,7 @@ class FrontendManager:

            return str(importlib.resources.files(comfyui_frontend_package) / "static")
        except ImportError:
-            logger.error(f"""comfyui-frontend-package is not installed.""".strip())
+            logger.error(f"comfyui-frontend-package is not installed.")
            return ""

    @classmethod
@ -166,15 +180,9 @@ class FrontendManager:
                importlib.resources.files(comfyui_workflow_templates) / "templates"
            )
        except ImportError:
-            logger.error(
-                f"""
-********** ERROR ***********
-
-comfyui-workflow-templates is not installed.
-
-********** ERROR ***********
-""".strip()
-            )
+            # we're not blind
+            logger.error("comfyui-workflow-templates is not installed.")
+            return ""

    @classmethod
    def embedded_docs_path(cls) -> str:
--- a/comfy/cmd/cuda_malloc.py
+++ b/comfy/cmd/cuda_malloc.py
@ -2,7 +2,7 @@ import importlib.util
 import os
 import subprocess

-from ..cli_args import args
+from ..cli_args import args, PerformanceFeature


 # Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
@ -84,7 +84,8 @@ if not args.cuda_malloc:
                version = module.__version__

        if int(version[0]) >= 2 and "+cu" in version:  # enable by default for torch version 2.0 and up only on cuda torch
-            args.cuda_malloc = cuda_malloc_supported()
+            if PerformanceFeature.AutoTune not in args.fast:  # Autotune has issues with cuda malloc
+                args.cuda_malloc = cuda_malloc_supported()
    except:
        pass

--- a/comfy/cmd/execution.py
+++ b/comfy/cmd/execution.py
@ -23,7 +23,7 @@ from opentelemetry.trace import get_current_span, StatusCode, Status
 from .main_pre import tracer

 from comfy_execution.caching import HierarchicalCache, LRUCache, CacheKeySetInputSignature, CacheKeySetID, \
-    DependencyAwareCache, \
+    NullCache, \
    BasicCache
 from comfy_execution.graph import get_input_info, ExecutionList, DynamicPrompt, ExecutionBlocker
 from comfy_execution.graph_utils import is_link, GraphBuilder
@ -105,13 +105,13 @@ class IsChangedCache:
 class CacheType(Enum):
    CLASSIC = 0
    LRU = 1
-    DEPENDENCY_AWARE = 2
+    NONE = 2


 class CacheSet:
    def __init__(self, cache_type=None, cache_size=None):
-        if cache_type == CacheType.DEPENDENCY_AWARE:
-            self.init_dependency_aware_cache()
+        if cache_type == CacheType.NONE:
+            self.init_null_cache()
            logger.info("Disabling intermediate node cache.")
        elif cache_type == CacheType.LRU:
            if cache_size is None:
@ -134,11 +134,12 @@ class CacheSet:
        self.ui = LRUCache(CacheKeySetInputSignature, max_size=cache_size)
        self.objects = HierarchicalCache(CacheKeySetID)

-    # only hold cached items while the decendents have not executed
-    def init_dependency_aware_cache(self):
-        self.outputs = DependencyAwareCache(CacheKeySetInputSignature)
-        self.ui = DependencyAwareCache(CacheKeySetInputSignature)
-        self.objects = DependencyAwareCache(CacheKeySetID)
+    def init_null_cache(self):
+        self.outputs = NullCache()
+        #The UI cache is expected to be iterable at the end of each workflow
+        #so it must cache at least a full workflow. Use Heirachical
+        self.ui = HierarchicalCache(CacheKeySetInputSignature)
+        self.objects = NullCache()

    def recursive_debug_dump(self):
        result = {
@ -151,7 +152,7 @@ class CacheSet:
 SENSITIVE_EXTRA_DATA_KEYS = ("auth_token_comfy_org", "api_key_comfy_org")


-def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, extra_data=None):
+def get_input_data(inputs, class_def, unique_id, execution_list=None, dynprompt=None, extra_data=None):
    if extra_data is None:
        extra_data = {}
    if outputs is None:
@ -175,10 +176,10 @@ def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, e
        if is_link(input_data) and (not input_info or not input_info.get("rawLink", False)):
            input_unique_id = input_data[0]
            output_index = input_data[1]
-            if outputs is None:
+            if execution_list is None:
                mark_missing()
                continue  # This might be a lazily-evaluated input
-            cached_output = outputs.get(input_unique_id)
+            cached_output = execution_list.get_output_cache(input_unique_id, unique_id)
            if cached_output is None:
                mark_missing()
                continue
@ -486,6 +487,7 @@ async def _execute(server, dynprompt, caches: CacheSet, current_item: str, extra
            cached_output = caches.ui.get(unique_id) or {}
            server.send_sync("executed", {"node": unique_id, "display_node": display_node_id, "output": cached_output.get("output", None), "prompt_id": prompt_id}, server.client_id)
        get_progress_state().finish_progress(unique_id)
+        execution_list.cache_update(unique_id, caches.outputs.get(unique_id))
        return RecursiveExecutionTuple(ExecutionResult.SUCCESS, None, None)

    input_data_all = None
@ -515,7 +517,7 @@ async def _execute(server, dynprompt, caches: CacheSet, current_item: str, extra
                    for r in result:
                        if is_link(r):
                            source_node, source_output = r[0], r[1]
-                            node_output = caches.outputs.get(source_node)[source_output]
+                            node_output = execution_list.get_output_cache(source_node, unique_id)[source_output]
                            for o in node_output:
                                resolved_output.append(o)

@ -527,7 +529,7 @@ async def _execute(server, dynprompt, caches: CacheSet, current_item: str, extra
            has_subgraph = False
        else:
            get_progress_state().start_progress(unique_id)
-            input_data_all, missing_keys, hidden_inputs = get_input_data(inputs, class_def, unique_id, caches.outputs, dynprompt, extra_data)
+            input_data_all, missing_keys, hidden_inputs = get_input_data(inputs, class_def, unique_id, execution_list, dynprompt, extra_data)
            if server.client_id is not None:
                server.last_node_id = display_node_id
                server.send_sync("executing", {"node": unique_id, "display_node": display_node_id, "prompt_id": prompt_id}, server.client_id)
@ -635,11 +637,14 @@ async def _execute(server, dynprompt, caches: CacheSet, current_item: str, extra
                subcache.clean_unused()
            for node_id in new_output_ids:
                execution_list.add_node(node_id)
+                execution_list.cache_link(node_id, unique_id)
            for link in new_output_links:
                execution_list.add_strong_link(link[0], link[1], unique_id)
            pending_subgraph_results[unique_id] = cached_outputs
            return RecursiveExecutionTuple(ExecutionResult.PENDING, None, None)
        caches.outputs.set(unique_id, output_data)
+        execution_list.cache_update(unique_id, output_data)
+
    except interruption.InterruptProcessingException as iex:
        logger.info("Processing interrupted")

--- a/comfy/cmd/main_pre.py
+++ b/comfy/cmd/main_pre.py
@ -17,6 +17,7 @@ import warnings
 from .. import options
 from ..app import logger

+os.environ['TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL'] = '1'
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
 os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
 os.environ["TORCHINDUCTOR_AUTOGRAD_CACHE"] = "1"
@ -63,6 +64,7 @@ if args.default_device is not None:
 if args.cuda_device is not None:
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
    os.environ['HIP_VISIBLE_DEVICES'] = str(args.cuda_device)
+    os.environ["ASCEND_RT_VISIBLE_DEVICES"] = str(args.cuda_device)
    this_logger.info("Set cuda device to: {}".format(args.cuda_device))

 if args.deterministic:
--- a/comfy/cmd/server.py
+++ b/comfy/cmd/server.py
@ -81,6 +81,28 @@ def get_comfyui_version():
    return __version__


+# Track deprecated paths that have been warned about to only warn once per file
+_deprecated_paths_warned = set()
+
+@web.middleware
+async def deprecation_warning(request: web.Request, handler):
+    """Middleware to warn about deprecated frontend API paths"""
+    path = request.path
+
+    if path.startswith("/scripts/ui") or path.startswith("/extensions/core/"):
+        # Only warn once per unique file path
+        if path not in _deprecated_paths_warned:
+            _deprecated_paths_warned.add(path)
+            logging.warning(
+                f"[DEPRECATION WARNING] Detected import of deprecated legacy API: {path}. "
+                f"This is likely caused by a custom node extension using outdated APIs. "
+                f"Please update your extensions or contact the extension author for an updated version."
+            )
+
+    response: web.Response = await handler(request)
+    return response
+
+
@web.middleware
 async def compress_body(request: web.Request, handler):
    accept_encoding = request.headers.get("Accept-Encoding", "")
@ -202,7 +224,7 @@ class PromptServer(ExecutorToClientProgress):
        self._external_address: Optional[str] = None
        self.background_tasks: dict[str, Task] = dict()

-        middlewares = [cache_control]
+        middlewares = [cache_control, deprecation_warning]
        if args.enable_compress_response_body:
            middlewares.append(compress_body)

@ -589,6 +611,8 @@ class PromptServer(ExecutorToClientProgress):
            vram_total, torch_vram_total = get_total_memory(device, torch_total_too=True)
            vram_free, torch_vram_free = get_free_memory(device, torch_free_too=True)
            required_frontend_version = FrontendManager.get_required_frontend_version()
+            installed_templates_version = FrontendManager.get_installed_templates_version()
+            required_templates_version = FrontendManager.get_required_templates_version()

            system_stats = {
                "system": {
@ -597,6 +621,8 @@ class PromptServer(ExecutorToClientProgress):
                    "ram_free": ram_free,
                    "comfyui_version": __version__,
                    "required_frontend_version": required_frontend_version,
+                    "installed_templates_version": installed_templates_version,
+                    "required_templates_version": required_templates_version,
                    "python_version": sys.version,
                    "pytorch_version": torch_version,
                    "embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded",
--- a/comfy/ldm/ace/vae/music_dcae_pipeline.py
+++ b/comfy/ldm/ace/vae/music_dcae_pipeline.py
@ -27,8 +27,6 @@ class MusicDCAE(torch.nn.Module):
        else:
            self.source_sample_rate = source_sample_rate

-        # self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100)
-
        self.transform = transforms.Compose([
            transforms.Normalize(0.5, 0.5),
        ])
@ -41,10 +39,6 @@ class MusicDCAE(torch.nn.Module):
        self.scale_factor = 0.1786
        self.shift_factor = -1.9091

-    def load_audio(self, audio_path):
-        audio, sr = torchaudio.load(audio_path)
-        return audio, sr
-
    def forward_mel(self, audios):
        mels = []
        for i in range(len(audios)):
@ -77,10 +71,8 @@ class MusicDCAE(torch.nn.Module):
            latent = self.dcae.encoder(mel.unsqueeze(0))
            latents.append(latent)
        latents = torch.cat(latents, dim=0)
-        # latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long()
        latents = (latents - self.shift_factor) * self.scale_factor
        return latents
-        # return latents, latent_lengths

    @torch.no_grad()
    def decode(self, latents, audio_lengths=None, sr=None):
@ -95,9 +87,7 @@ class MusicDCAE(torch.nn.Module):
            wav = self.vocoder.decode(mels[0]).squeeze(1)

            if sr is not None:
-                # resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype)
                wav = torchaudio.functional.resample(wav, 44100, sr)
-                # wav = resampler(wav)
            else:
                sr = 44100
            pred_wavs.append(wav)
@ -105,7 +95,6 @@ class MusicDCAE(torch.nn.Module):
        if audio_lengths is not None:
            pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
        return torch.stack(pred_wavs)
-        # return sr, pred_wavs

    def forward(self, audios, audio_lengths=None, sr=None):
        latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
--- a/comfy/ldm/chroma_radiance/model.py
+++ b/comfy/ldm/chroma_radiance/model.py
@ -186,15 +186,15 @@ class ChromaRadiance(Chroma):
        nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size)
        nerf_pixels = nerf_pixels.transpose(1, 2)  # -> [B, NumPatches, C * P * P]

+        # Reshape for per-patch processing
+        nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
+        nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size ** 2).transpose(1, 2)
+
        if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size:
            # Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than
            # the tile size.
-            img_dct = self.forward_tiled_nerf(img_out, nerf_pixels, B, C, num_patches, patch_size, params)
+            img_dct = self.forward_tiled_nerf(nerf_hidden, nerf_pixels, B, C, num_patches, patch_size, params)
        else:
-            # Reshape for per-patch processing
-            nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
-            nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size ** 2).transpose(1, 2)
-
            # Get DCT-encoded pixel embeddings [pixel-dct]
            img_dct = self.nerf_image_embedder(nerf_pixels)

@ -237,17 +237,8 @@ class ChromaRadiance(Chroma):
            end = min(i + tile_size, num_patches)

            # Slice the current tile from the input tensors
-            nerf_hidden_tile = nerf_hidden[:, i:end, :]
-            nerf_pixels_tile = nerf_pixels[:, i:end, :]
-
-            # Get the actual number of patches in this tile (can be smaller for the last tile)
-            num_patches_tile = nerf_hidden_tile.shape[1]
-
-            # Reshape the tile for per-patch processing
-            # [B, NumPatches_tile, D] -> [B * NumPatches_tile, D]
-            nerf_hidden_tile = nerf_hidden_tile.reshape(batch * num_patches_tile, params.hidden_size)
-            # [B, NumPatches_tile, C*P*P] -> [B*NumPatches_tile, C, P*P] -> [B*NumPatches_tile, P*P, C]
-            nerf_pixels_tile = nerf_pixels_tile.reshape(batch * num_patches_tile, channels, patch_size ** 2).transpose(1, 2)
+            nerf_hidden_tile = nerf_hidden[i * batch:end * batch]
+            nerf_pixels_tile = nerf_pixels[i * batch:end * batch]

            # get DCT-encoded pixel embeddings [pixel-dct]
            img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile)
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -37,7 +37,10 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:

 def apply_rope1(x: Tensor, freqs_cis: Tensor):
    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
-    x_out = freqs_cis[..., 0] * x_[..., 0] + freqs_cis[..., 1] * x_[..., 1]
+
+    x_out = freqs_cis[..., 0] * x_[..., 0]
+    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+
    return x_out.reshape(*x.shape).type_as(x)

 def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
--- a/comfy/ldm/hunyuan_video/vae_refiner.py
+++ b/comfy/ldm/hunyuan_video/vae_refiner.py
@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from ..modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d
+from ..modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d, Normalize
 from ..models.autoencoder import DiagonalGaussianRegularizer
 from ...ops import disable_weight_init as ops

@ -18,11 +18,12 @@ class RMS_norm(nn.Module):


 class DnSmpl(nn.Module):
-    def __init__(self, ic, oc, tds=True):
+    def __init__(self, ic, oc, tds=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tds else 1 * 2 * 2
        assert oc % fct == 0
-        self.conv = VideoConv3d(ic, oc // fct, kernel_size=3)
+        self.conv = op(ic, oc // fct, kernel_size=3, stride=1, padding=1)
+        self.refiner_vae = refiner_vae

        self.tds = tds
        self.gs = fct * ic // oc
@ -31,7 +32,7 @@ class DnSmpl(nn.Module):
        r1 = 2 if self.tds else 1
        h = self.conv(x)

-        if self.tds:
+        if self.tds and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            hf = hf.reshape(b, c, f, ht // 2, 2, wd // 2, 2)
@ -67,6 +68,7 @@ class DnSmpl(nn.Module):
            sc = torch.cat([xf, xn], dim=2)
        else:
            b, c, frms, ht, wd = h.shape
+
            nf = frms // r1
            h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
            h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
@ -84,10 +86,11 @@ class DnSmpl(nn.Module):


 class UpSmpl(nn.Module):
-    def __init__(self, ic, oc, tus=True):
+    def __init__(self, ic, oc, tus=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tus else 1 * 2 * 2
-        self.conv = VideoConv3d(ic, oc * fct, kernel_size=3)
+        self.conv = op(ic, oc * fct, kernel_size=3, stride=1, padding=1)
+        self.refiner_vae = refiner_vae

        self.tus = tus
        self.rp = fct * oc // ic
@ -96,7 +99,7 @@ class UpSmpl(nn.Module):
        r1 = 2 if self.tus else 1
        h = self.conv(x)

-        if self.tus:
+        if self.tus and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            nc = c // (2 * 2)
@ -150,43 +153,56 @@ class UpSmpl(nn.Module):

 class Encoder(nn.Module):
    def __init__(self, in_channels, z_channels, block_out_channels, num_res_blocks,
-                 ffactor_spatial, ffactor_temporal, downsample_match_channel=True, **_):
+                 ffactor_spatial, ffactor_temporal, downsample_match_channel=True, refiner_vae=True, **_):
        super().__init__()
        self.z_channels = z_channels
        self.block_out_channels = block_out_channels
        self.num_res_blocks = num_res_blocks
-        self.conv_in = VideoConv3d(in_channels, block_out_channels[0], 3, 1, 1)
+        self.ffactor_temporal = ffactor_temporal
+
+        self.refiner_vae = refiner_vae
+        if self.refiner_vae:
+            conv_op = VideoConv3d
+            norm_op = RMS_norm
+        else:
+            conv_op = ops.Conv3d
+            norm_op = Normalize
+
+        self.conv_in = conv_op(in_channels, block_out_channels[0], 3, 1, 1)

        self.down = nn.ModuleList()
        ch = block_out_channels[0]
        depth = (ffactor_spatial >> 1).bit_length()
-        depth_temporal = ((ffactor_spatial // ffactor_temporal) >> 1).bit_length()
+        depth_temporal = ((ffactor_spatial // self.ffactor_temporal) >> 1).bit_length()

        for i, tgt in enumerate(block_out_channels):
            stage = nn.Module()
            stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
                                                     out_channels=tgt,
                                                     temb_channels=0,
-                                                     conv_op=VideoConv3d, norm_op=RMS_norm)
+                                                     conv_op=conv_op, norm_op=norm_op)
                                         for j in range(num_res_blocks)])
            ch = tgt
            if i < depth:
                nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and downsample_match_channel else ch
-                stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal)
+                stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
                ch = nxt
            self.down.append(stage)

        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
-        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
+        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)

-        self.norm_out = RMS_norm(ch)
-        self.conv_out = VideoConv3d(ch, z_channels << 1, 3, 1, 1)
+        self.norm_out = norm_op(ch)
+        self.conv_out = conv_op(ch, z_channels << 1, 3, 1, 1)

        self.regul = DiagonalGaussianRegularizer()

    def forward(self, x):
+        if not self.refiner_vae and x.shape[2] == 1:
+            x = x.expand(-1, -1, self.ffactor_temporal, -1, -1)
+
        x = self.conv_in(x)

        for stage in self.down:
@ -202,32 +218,43 @@ class Encoder(nn.Module):
        skip = x.view(b, c // grp, grp, t, h, w).mean(2)

        out = self.conv_out(F.silu(self.norm_out(x))) + skip
-        out = self.regul(out)[0]

-        out = torch.cat((out[:, :, :1], out), dim=2)
-        out = out.permute(0, 2, 1, 3, 4)
-        b, f_times_2, c, h, w = out.shape
-        out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
-        out = out.permute(0, 2, 1, 3, 4).contiguous()
+        if self.refiner_vae:
+            out = self.regul(out)[0]
+
+            out = torch.cat((out[:, :, :1], out), dim=2)
+            out = out.permute(0, 2, 1, 3, 4)
+            b, f_times_2, c, h, w = out.shape
+            out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
+            out = out.permute(0, 2, 1, 3, 4).contiguous()
+
        return out


 class Decoder(nn.Module):
    def __init__(self, z_channels, out_channels, block_out_channels, num_res_blocks,
-                 ffactor_spatial, ffactor_temporal, upsample_match_channel=True, **_):
+                 ffactor_spatial, ffactor_temporal, upsample_match_channel=True, refiner_vae=True, **_):
        super().__init__()
        block_out_channels = block_out_channels[::-1]
        self.z_channels = z_channels
        self.block_out_channels = block_out_channels
        self.num_res_blocks = num_res_blocks

+        self.refiner_vae = refiner_vae
+        if self.refiner_vae:
+            conv_op = VideoConv3d
+            norm_op = RMS_norm
+        else:
+            conv_op = ops.Conv3d
+            norm_op = Normalize
+
        ch = block_out_channels[0]
-        self.conv_in = VideoConv3d(z_channels, ch, 3)
+        self.conv_in = conv_op(z_channels, ch, kernel_size=3, stride=1, padding=1)

        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
-        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
+        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)

        self.up = nn.ModuleList()
        depth = (ffactor_spatial >> 1).bit_length()
@ -238,25 +265,26 @@ class Decoder(nn.Module):
            stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
                                                     out_channels=tgt,
                                                     temb_channels=0,
-                                                     conv_op=VideoConv3d, norm_op=RMS_norm)
+                                                     conv_op=conv_op, norm_op=norm_op)
                                         for j in range(num_res_blocks + 1)])
            ch = tgt
            if i < depth:
                nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and upsample_match_channel else ch
-                stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal)
+                stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
                ch = nxt
            self.up.append(stage)

-        self.norm_out = RMS_norm(ch)
-        self.conv_out = VideoConv3d(ch, out_channels, 3)
+        self.norm_out = norm_op(ch)
+        self.conv_out = conv_op(ch, out_channels, 3, stride=1, padding=1)

    def forward(self, z):
-        z = z.permute(0, 2, 1, 3, 4)
-        b, f, c, h, w = z.shape
-        z = z.reshape(b, f, 2, c // 2, h, w)
-        z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
-        z = z.permute(0, 2, 1, 3, 4)
-        z = z[:, :, 1:]
+        if self.refiner_vae:
+            z = z.permute(0, 2, 1, 3, 4)
+            b, f, c, h, w = z.shape
+            z = z.reshape(b, f, 2, c // 2, h, w)
+            z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
+            z = z.permute(0, 2, 1, 3, 4)
+            z = z[:, :, 1:]

        x = self.conv_in(z) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
        x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))
@ -267,4 +295,10 @@ class Decoder(nn.Module):
            if hasattr(stage, 'upsample'):
                x = stage.upsample(x)

-        return self.conv_out(F.silu(self.norm_out(x)))
+        out = self.conv_out(F.silu(self.norm_out(x)))
+
+        if not self.refiner_vae:
+            if z.shape[-3] == 1:
+                out = out[:, :, -1:]
+
+        return out
--- a/comfy/ldm/mmaudio/vae/init.py
+++ b/comfy/ldm/mmaudio/vae/init.py
--- a/comfy/ldm/mmaudio/vae/activations.py
+++ b/comfy/ldm/mmaudio/vae/activations.py
@ -0,0 +1,120 @@
+# Implementation adapted from https://github.com/EdwardDixon/snake under the MIT license.
+#   LICENSE is in incl_licenses directory.
+
+import torch
+from torch import nn, sin, pow
+from torch.nn import Parameter
+import comfy.model_management
+
+class Snake(nn.Module):
+    '''
+    Implementation of a sine-based periodic activation function
+    Shape:
+        - Input: (B, C, T)
+        - Output: (B, C, T), same shape as the input
+    Parameters:
+        - alpha - trainable parameter
+    References:
+        - This activation function is from this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
+        https://arxiv.org/abs/2006.08195
+    Examples:
+        >>> a1 = snake(256)
+        >>> x = torch.randn(256)
+        >>> x = a1(x)
+    '''
+    def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
+        '''
+        Initialization.
+        INPUT:
+            - in_features: shape of the input
+            - alpha: trainable parameter
+            alpha is initialized to 1 by default, higher values = higher-frequency.
+            alpha will be trained along with the rest of your model.
+        '''
+        super(Snake, self).__init__()
+        self.in_features = in_features
+
+        # initialize alpha
+        self.alpha_logscale = alpha_logscale
+        if self.alpha_logscale:
+            self.alpha = Parameter(torch.empty(in_features))
+        else:
+            self.alpha = Parameter(torch.empty(in_features))
+
+        self.alpha.requires_grad = alpha_trainable
+
+        self.no_div_by_zero = 0.000000001
+
+    def forward(self, x):
+        '''
+        Forward pass of the function.
+        Applies the function to the input elementwise.
+        Snake ∶= x + 1/a * sin^2 (xa)
+        '''
+        alpha = comfy.model_management.cast_to(self.alpha, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
+        if self.alpha_logscale:
+            alpha = torch.exp(alpha)
+        x = x + (1.0 / (alpha + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+
+        return x
+
+
+class SnakeBeta(nn.Module):
+    '''
+    A modified Snake function which uses separate parameters for the magnitude of the periodic components
+    Shape:
+        - Input: (B, C, T)
+        - Output: (B, C, T), same shape as the input
+    Parameters:
+        - alpha - trainable parameter that controls frequency
+        - beta - trainable parameter that controls magnitude
+    References:
+        - This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
+        https://arxiv.org/abs/2006.08195
+    Examples:
+        >>> a1 = snakebeta(256)
+        >>> x = torch.randn(256)
+        >>> x = a1(x)
+    '''
+    def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
+        '''
+        Initialization.
+        INPUT:
+            - in_features: shape of the input
+            - alpha - trainable parameter that controls frequency
+            - beta - trainable parameter that controls magnitude
+            alpha is initialized to 1 by default, higher values = higher-frequency.
+            beta is initialized to 1 by default, higher values = higher-magnitude.
+            alpha will be trained along with the rest of your model.
+        '''
+        super(SnakeBeta, self).__init__()
+        self.in_features = in_features
+
+        # initialize alpha
+        self.alpha_logscale = alpha_logscale
+        if self.alpha_logscale:
+            self.alpha = Parameter(torch.empty(in_features))
+            self.beta = Parameter(torch.empty(in_features))
+        else:
+            self.alpha = Parameter(torch.empty(in_features))
+            self.beta = Parameter(torch.empty(in_features))
+
+        self.alpha.requires_grad = alpha_trainable
+        self.beta.requires_grad = alpha_trainable
+
+        self.no_div_by_zero = 0.000000001
+
+    def forward(self, x):
+        '''
+        Forward pass of the function.
+        Applies the function to the input elementwise.
+        SnakeBeta ∶= x + 1/b * sin^2 (xa)
+        '''
+        alpha = comfy.model_management.cast_to(self.alpha, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
+        beta = comfy.model_management.cast_to(self.beta, dtype=x.dtype, device=x.device).unsqueeze(0).unsqueeze(-1)
+        if self.alpha_logscale:
+            alpha = torch.exp(alpha)
+            beta = torch.exp(beta)
+        x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+
+        return x
--- a/comfy/ldm/mmaudio/vae/alias_free_torch.py
+++ b/comfy/ldm/mmaudio/vae/alias_free_torch.py
@ -0,0 +1,157 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+import comfy.model_management
+
+if 'sinc' in dir(torch):
+    sinc = torch.sinc
+else:
+    # This code is adopted from adefossez's julius.core.sinc under the MIT License
+    # https://adefossez.github.io/julius/julius/core.html
+    #   LICENSE is in incl_licenses directory.
+    def sinc(x: torch.Tensor):
+        """
+        Implementation of sinc, i.e. sin(pi * x) / (pi * x)
+        __Warning__: Different to julius.sinc, the input is multiplied by `pi`!
+        """
+        return torch.where(x == 0,
+                           torch.tensor(1., device=x.device, dtype=x.dtype),
+                           torch.sin(math.pi * x) / math.pi / x)
+
+
+# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
+# https://adefossez.github.io/julius/julius/lowpass.html
+#   LICENSE is in incl_licenses directory.
+def kaiser_sinc_filter1d(cutoff, half_width, kernel_size): # return filter [1,1,kernel_size]
+    even = (kernel_size % 2 == 0)
+    half_size = kernel_size // 2
+
+    #For kaiser window
+    delta_f = 4 * half_width
+    A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
+    if A > 50.:
+        beta = 0.1102 * (A - 8.7)
+    elif A >= 21.:
+        beta = 0.5842 * (A - 21)**0.4 + 0.07886 * (A - 21.)
+    else:
+        beta = 0.
+    window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
+
+    # ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
+    if even:
+        time = (torch.arange(-half_size, half_size) + 0.5)
+    else:
+        time = torch.arange(kernel_size) - half_size
+    if cutoff == 0:
+        filter_ = torch.zeros_like(time)
+    else:
+        filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
+        # Normalize filter to have sum = 1, otherwise we will have a small leakage
+        # of the constant component in the input signal.
+        filter_ /= filter_.sum()
+        filter = filter_.view(1, 1, kernel_size)
+
+    return filter
+
+
+class LowPassFilter1d(nn.Module):
+    def __init__(self,
+                 cutoff=0.5,
+                 half_width=0.6,
+                 stride: int = 1,
+                 padding: bool = True,
+                 padding_mode: str = 'replicate',
+                 kernel_size: int = 12):
+        # kernel_size should be even number for stylegan3 setup,
+        # in this implementation, odd number is also possible.
+        super().__init__()
+        if cutoff < -0.:
+            raise ValueError("Minimum cutoff must be larger than zero.")
+        if cutoff > 0.5:
+            raise ValueError("A cutoff above 0.5 does not make sense.")
+        self.kernel_size = kernel_size
+        self.even = (kernel_size % 2 == 0)
+        self.pad_left = kernel_size // 2 - int(self.even)
+        self.pad_right = kernel_size // 2
+        self.stride = stride
+        self.padding = padding
+        self.padding_mode = padding_mode
+        filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
+        self.register_buffer("filter", filter)
+
+    #input [B, C, T]
+    def forward(self, x):
+        _, C, _ = x.shape
+
+        if self.padding:
+            x = F.pad(x, (self.pad_left, self.pad_right),
+                      mode=self.padding_mode)
+        out = F.conv1d(x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device),
+                       stride=self.stride, groups=C)
+
+        return out
+
+
+class UpSample1d(nn.Module):
+    def __init__(self, ratio=2, kernel_size=None):
+        super().__init__()
+        self.ratio = ratio
+        self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+        self.stride = ratio
+        self.pad = self.kernel_size // ratio - 1
+        self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
+        self.pad_right = self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
+        filter = kaiser_sinc_filter1d(cutoff=0.5 / ratio,
+                                      half_width=0.6 / ratio,
+                                      kernel_size=self.kernel_size)
+        self.register_buffer("filter", filter)
+
+    # x: [B, C, T]
+    def forward(self, x):
+        _, C, _ = x.shape
+
+        x = F.pad(x, (self.pad, self.pad), mode='replicate')
+        x = self.ratio * F.conv_transpose1d(
+            x, comfy.model_management.cast_to(self.filter.expand(C, -1, -1), dtype=x.dtype, device=x.device), stride=self.stride, groups=C)
+        x = x[..., self.pad_left:-self.pad_right]
+
+        return x
+
+
+class DownSample1d(nn.Module):
+    def __init__(self, ratio=2, kernel_size=None):
+        super().__init__()
+        self.ratio = ratio
+        self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
+        self.lowpass = LowPassFilter1d(cutoff=0.5 / ratio,
+                                       half_width=0.6 / ratio,
+                                       stride=ratio,
+                                       kernel_size=self.kernel_size)
+
+    def forward(self, x):
+        xx = self.lowpass(x)
+
+        return xx
+
+class Activation1d(nn.Module):
+    def __init__(self,
+                 activation,
+                 up_ratio: int = 2,
+                 down_ratio: int = 2,
+                 up_kernel_size: int = 12,
+                 down_kernel_size: int = 12):
+        super().__init__()
+        self.up_ratio = up_ratio
+        self.down_ratio = down_ratio
+        self.act = activation
+        self.upsample = UpSample1d(up_ratio, up_kernel_size)
+        self.downsample = DownSample1d(down_ratio, down_kernel_size)
+
+    # x: [B,C,T]
+    def forward(self, x):
+        x = self.upsample(x)
+        x = self.act(x)
+        x = self.downsample(x)
+
+        return x
--- a/comfy/ldm/mmaudio/vae/autoencoder.py
+++ b/comfy/ldm/mmaudio/vae/autoencoder.py
@ -0,0 +1,156 @@
+from typing import Literal
+
+import torch
+import torch.nn as nn
+
+from .distributions import DiagonalGaussianDistribution
+from .vae import VAE_16k
+from .bigvgan import BigVGANVocoder
+import logging
+
+try:
+    import torchaudio
+except:
+    logging.warning("torchaudio missing, MMAudio VAE model will be broken")
+
+def dynamic_range_compression_torch(x, C=1, clip_val=1e-5, *, norm_fn):
+    return norm_fn(torch.clamp(x, min=clip_val) * C)
+
+
+def spectral_normalize_torch(magnitudes, norm_fn):
+    output = dynamic_range_compression_torch(magnitudes, norm_fn=norm_fn)
+    return output
+
+class MelConverter(nn.Module):
+
+    def __init__(
+        self,
+        *,
+        sampling_rate: float,
+        n_fft: int,
+        num_mels: int,
+        hop_size: int,
+        win_size: int,
+        fmin: float,
+        fmax: float,
+        norm_fn,
+    ):
+        super().__init__()
+        self.sampling_rate = sampling_rate
+        self.n_fft = n_fft
+        self.num_mels = num_mels
+        self.hop_size = hop_size
+        self.win_size = win_size
+        self.fmin = fmin
+        self.fmax = fmax
+        self.norm_fn = norm_fn
+
+        # mel = librosa_mel_fn(sr=self.sampling_rate,
+        #                      n_fft=self.n_fft,
+        #                      n_mels=self.num_mels,
+        #                      fmin=self.fmin,
+        #                      fmax=self.fmax)
+        # mel_basis = torch.from_numpy(mel).float()
+        mel_basis = torch.empty((num_mels, 1 + n_fft // 2))
+        hann_window = torch.hann_window(self.win_size)
+
+        self.register_buffer('mel_basis', mel_basis)
+        self.register_buffer('hann_window', hann_window)
+
+    @property
+    def device(self):
+        return self.mel_basis.device
+
+    def forward(self, waveform: torch.Tensor, center: bool = False) -> torch.Tensor:
+        waveform = waveform.clamp(min=-1., max=1.).to(self.device)
+
+        waveform = torch.nn.functional.pad(
+            waveform.unsqueeze(1),
+            [int((self.n_fft - self.hop_size) / 2),
+             int((self.n_fft - self.hop_size) / 2)],
+            mode='reflect')
+        waveform = waveform.squeeze(1)
+
+        spec = torch.stft(waveform,
+                          self.n_fft,
+                          hop_length=self.hop_size,
+                          win_length=self.win_size,
+                          window=self.hann_window,
+                          center=center,
+                          pad_mode='reflect',
+                          normalized=False,
+                          onesided=True,
+                          return_complex=True)
+
+        spec = torch.view_as_real(spec)
+        spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
+        spec = torch.matmul(self.mel_basis, spec)
+        spec = spectral_normalize_torch(spec, self.norm_fn)
+
+        return spec
+
+class AudioAutoencoder(nn.Module):
+
+    def __init__(
+        self,
+        *,
+        # ckpt_path: str,
+        mode=Literal['16k', '44k'],
+        need_vae_encoder: bool = True,
+    ):
+        super().__init__()
+
+        assert mode == "16k", "Only 16k mode is supported currently."
+        self.mel_converter = MelConverter(sampling_rate=16_000,
+                            n_fft=1024,
+                            num_mels=80,
+                            hop_size=256,
+                            win_size=1024,
+                            fmin=0,
+                            fmax=8_000,
+                            norm_fn=torch.log10)
+
+        self.vae = VAE_16k().eval()
+
+        bigvgan_config = {
+            "resblock": "1",
+            "num_mels": 80,
+            "upsample_rates": [4, 4, 2, 2, 2, 2],
+            "upsample_kernel_sizes": [8, 8, 4, 4, 4, 4],
+            "upsample_initial_channel": 1536,
+            "resblock_kernel_sizes": [3, 7, 11],
+            "resblock_dilation_sizes": [
+                [1, 3, 5],
+                [1, 3, 5],
+                [1, 3, 5],
+            ],
+            "activation": "snakebeta",
+            "snake_logscale": True,
+        }
+
+        self.vocoder = BigVGANVocoder(
+            bigvgan_config
+        ).eval()
+
+    @torch.inference_mode()
+    def encode_audio(self, x) -> DiagonalGaussianDistribution:
+        # x: (B * L)
+        mel = self.mel_converter(x)
+        dist = self.vae.encode(mel)
+
+        return dist
+
+    @torch.no_grad()
+    def decode(self, z):
+        mel_decoded = self.vae.decode(z)
+        audio = self.vocoder(mel_decoded)
+
+        audio = torchaudio.functional.resample(audio, 16000, 44100)
+        return audio
+
+    @torch.no_grad()
+    def encode(self, audio):
+        audio = audio.mean(dim=1)
+        audio = torchaudio.functional.resample(audio, 44100, 16000)
+        dist = self.encode_audio(audio)
+        return dist.mean
--- a/comfy/ldm/mmaudio/vae/bigvgan.py
+++ b/comfy/ldm/mmaudio/vae/bigvgan.py
@ -0,0 +1,219 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.
+#   Licensed under the MIT license.
+
+# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
+#   LICENSE is in incl_licenses directory.
+
+import torch
+import torch.nn as nn
+from types import SimpleNamespace
+from . import activations
+from .alias_free_torch import Activation1d
+import comfy.ops
+ops = comfy.ops.disable_weight_init
+
+def get_padding(kernel_size, dilation=1):
+    return int((kernel_size * dilation - dilation) / 2)
+
+class AMPBlock1(torch.nn.Module):
+
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5), activation=None):
+        super(AMPBlock1, self).__init__()
+        self.h = h
+
+        self.convs1 = nn.ModuleList([
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=dilation[0],
+                       padding=get_padding(kernel_size, dilation[0])),
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=dilation[1],
+                       padding=get_padding(kernel_size, dilation[1])),
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=dilation[2],
+                       padding=get_padding(kernel_size, dilation[2]))
+        ])
+
+        self.convs2 = nn.ModuleList([
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=1,
+                       padding=get_padding(kernel_size, 1)),
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=1,
+                       padding=get_padding(kernel_size, 1)),
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=1,
+                       padding=get_padding(kernel_size, 1))
+        ])
+
+        self.num_layers = len(self.convs1) + len(self.convs2)  # total number of conv layers
+
+        if activation == 'snake':  # periodic nonlinearity with snake function and anti-aliasing
+            self.activations = nn.ModuleList([
+                Activation1d(
+                    activation=activations.Snake(channels, alpha_logscale=h.snake_logscale))
+                for _ in range(self.num_layers)
+            ])
+        elif activation == 'snakebeta':  # periodic nonlinearity with snakebeta function and anti-aliasing
+            self.activations = nn.ModuleList([
+                Activation1d(
+                    activation=activations.SnakeBeta(channels, alpha_logscale=h.snake_logscale))
+                for _ in range(self.num_layers)
+            ])
+        else:
+            raise NotImplementedError(
+                "activation incorrectly specified. check the config file and look for 'activation'."
+            )
+
+    def forward(self, x):
+        acts1, acts2 = self.activations[::2], self.activations[1::2]
+        for c1, c2, a1, a2 in zip(self.convs1, self.convs2, acts1, acts2):
+            xt = a1(x)
+            xt = c1(xt)
+            xt = a2(xt)
+            xt = c2(xt)
+            x = xt + x
+
+        return x
+
+
+class AMPBlock2(torch.nn.Module):
+
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), activation=None):
+        super(AMPBlock2, self).__init__()
+        self.h = h
+
+        self.convs = nn.ModuleList([
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=dilation[0],
+                       padding=get_padding(kernel_size, dilation[0])),
+                ops.Conv1d(channels,
+                       channels,
+                       kernel_size,
+                       1,
+                       dilation=dilation[1],
+                       padding=get_padding(kernel_size, dilation[1]))
+        ])
+
+        self.num_layers = len(self.convs)  # total number of conv layers
+
+        if activation == 'snake':  # periodic nonlinearity with snake function and anti-aliasing
+            self.activations = nn.ModuleList([
+                Activation1d(
+                    activation=activations.Snake(channels, alpha_logscale=h.snake_logscale))
+                for _ in range(self.num_layers)
+            ])
+        elif activation == 'snakebeta':  # periodic nonlinearity with snakebeta function and anti-aliasing
+            self.activations = nn.ModuleList([
+                Activation1d(
+                    activation=activations.SnakeBeta(channels, alpha_logscale=h.snake_logscale))
+                for _ in range(self.num_layers)
+            ])
+        else:
+            raise NotImplementedError(
+                "activation incorrectly specified. check the config file and look for 'activation'."
+            )
+
+    def forward(self, x):
+        for c, a in zip(self.convs, self.activations):
+            xt = a(x)
+            xt = c(xt)
+            x = xt + x
+
+        return x
+
+
+class BigVGANVocoder(torch.nn.Module):
+    # this is our main BigVGAN model. Applies anti-aliased periodic activation for resblocks.
+    def __init__(self, h):
+        super().__init__()
+        if isinstance(h, dict):
+            h = SimpleNamespace(**h)
+        self.h = h
+
+        self.num_kernels = len(h.resblock_kernel_sizes)
+        self.num_upsamples = len(h.upsample_rates)
+
+        # pre conv
+        self.conv_pre = ops.Conv1d(h.num_mels, h.upsample_initial_channel, 7, 1, padding=3)
+
+        # define which AMPBlock to use. BigVGAN uses AMPBlock1 as default
+        resblock = AMPBlock1 if h.resblock == '1' else AMPBlock2
+
+        # transposed conv-based upsamplers. does not apply anti-aliasing
+        self.ups = nn.ModuleList()
+        for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
+            self.ups.append(
+                nn.ModuleList([
+                        ops.ConvTranspose1d(h.upsample_initial_channel // (2**i),
+                                        h.upsample_initial_channel // (2**(i + 1)),
+                                        k,
+                                        u,
+                                        padding=(k - u) // 2)
+                ]))
+
+        # residual blocks using anti-aliased multi-periodicity composition modules (AMP)
+        self.resblocks = nn.ModuleList()
+        for i in range(len(self.ups)):
+            ch = h.upsample_initial_channel // (2**(i + 1))
+            for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)):
+                self.resblocks.append(resblock(h, ch, k, d, activation=h.activation))
+
+        # post conv
+        if h.activation == "snake":  # periodic nonlinearity with snake function and anti-aliasing
+            activation_post = activations.Snake(ch, alpha_logscale=h.snake_logscale)
+            self.activation_post = Activation1d(activation=activation_post)
+        elif h.activation == "snakebeta":  # periodic nonlinearity with snakebeta function and anti-aliasing
+            activation_post = activations.SnakeBeta(ch, alpha_logscale=h.snake_logscale)
+            self.activation_post = Activation1d(activation=activation_post)
+        else:
+            raise NotImplementedError(
+                "activation incorrectly specified. check the config file and look for 'activation'."
+            )
+
+        self.conv_post = ops.Conv1d(ch, 1, 7, 1, padding=3)
+
+
+    def forward(self, x):
+        # pre conv
+        x = self.conv_pre(x)
+
+        for i in range(self.num_upsamples):
+            # upsampling
+            for i_up in range(len(self.ups[i])):
+                x = self.ups[i][i_up](x)
+            # AMP blocks
+            xs = None
+            for j in range(self.num_kernels):
+                if xs is None:
+                    xs = self.resblocks[i * self.num_kernels + j](x)
+                else:
+                    xs += self.resblocks[i * self.num_kernels + j](x)
+            x = xs / self.num_kernels
+
+        # post conv
+        x = self.activation_post(x)
+        x = self.conv_post(x)
+        x = torch.tanh(x)
+
+        return x
--- a/comfy/ldm/mmaudio/vae/distributions.py
+++ b/comfy/ldm/mmaudio/vae/distributions.py
@ -0,0 +1,92 @@
+import torch
+import numpy as np
+
+
+class AbstractDistribution:
+    def sample(self):
+        raise NotImplementedError()
+
+    def mode(self):
+        raise NotImplementedError()
+
+
+class DiracDistribution(AbstractDistribution):
+    def __init__(self, value):
+        self.value = value
+
+    def sample(self):
+        return self.value
+
+    def mode(self):
+        return self.value
+
+
+class DiagonalGaussianDistribution(object):
+    def __init__(self, parameters, deterministic=False):
+        self.parameters = parameters
+        self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
+        self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
+        self.deterministic = deterministic
+        self.std = torch.exp(0.5 * self.logvar)
+        self.var = torch.exp(self.logvar)
+        if self.deterministic:
+            self.var = self.std = torch.zeros_like(self.mean, device=self.parameters.device)
+
+    def sample(self):
+        x = self.mean + self.std * torch.randn(self.mean.shape, device=self.parameters.device)
+        return x
+
+    def kl(self, other=None):
+        if self.deterministic:
+            return torch.Tensor([0.])
+        else:
+            if other is None:
+                return 0.5 * torch.sum(torch.pow(self.mean, 2)
+                                       + self.var - 1.0 - self.logvar,
+                                       dim=[1, 2, 3])
+            else:
+                return 0.5 * torch.sum(
+                    torch.pow(self.mean - other.mean, 2) / other.var
+                    + self.var / other.var - 1.0 - self.logvar + other.logvar,
+                    dim=[1, 2, 3])
+
+    def nll(self, sample, dims=[1,2,3]):
+        if self.deterministic:
+            return torch.Tensor([0.])
+        logtwopi = np.log(2.0 * np.pi)
+        return 0.5 * torch.sum(
+            logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
+            dim=dims)
+
+    def mode(self):
+        return self.mean
+
+
+def normal_kl(mean1, logvar1, mean2, logvar2):
+    """
+    source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
+    Compute the KL divergence between two gaussians.
+    Shapes are automatically broadcasted, so batches can be compared to
+    scalars, among other use cases.
+    """
+    tensor = None
+    for obj in (mean1, logvar1, mean2, logvar2):
+        if isinstance(obj, torch.Tensor):
+            tensor = obj
+            break
+    assert tensor is not None, "at least one argument must be a Tensor"
+
+    # Force variances to be Tensors. Broadcasting helps convert scalars to
+    # Tensors, but it does not work for torch.exp().
+    logvar1, logvar2 = [
+        x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
+        for x in (logvar1, logvar2)
+    ]
+
+    return 0.5 * (
+        -1.0
+        + logvar2
+        - logvar1
+        + torch.exp(logvar1 - logvar2)
+        + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
+    )
--- a/comfy/ldm/mmaudio/vae/vae.py
+++ b/comfy/ldm/mmaudio/vae/vae.py
@ -0,0 +1,358 @@
+import logging
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from .vae_modules import (AttnBlock1D, Downsample1D, ResnetBlock1D,
+                                                 Upsample1D, nonlinearity)
+from .distributions import DiagonalGaussianDistribution
+
+import comfy.ops
+ops = comfy.ops.disable_weight_init
+
+log = logging.getLogger()
+
+DATA_MEAN_80D = [
+    -1.6058, -1.3676, -1.2520, -1.2453, -1.2078, -1.2224, -1.2419, -1.2439, -1.2922, -1.2927,
+    -1.3170, -1.3543, -1.3401, -1.3836, -1.3907, -1.3912, -1.4313, -1.4152, -1.4527, -1.4728,
+    -1.4568, -1.5101, -1.5051, -1.5172, -1.5623, -1.5373, -1.5746, -1.5687, -1.6032, -1.6131,
+    -1.6081, -1.6331, -1.6489, -1.6489, -1.6700, -1.6738, -1.6953, -1.6969, -1.7048, -1.7280,
+    -1.7361, -1.7495, -1.7658, -1.7814, -1.7889, -1.8064, -1.8221, -1.8377, -1.8417, -1.8643,
+    -1.8857, -1.8929, -1.9173, -1.9379, -1.9531, -1.9673, -1.9824, -2.0042, -2.0215, -2.0436,
+    -2.0766, -2.1064, -2.1418, -2.1855, -2.2319, -2.2767, -2.3161, -2.3572, -2.3954, -2.4282,
+    -2.4659, -2.5072, -2.5552, -2.6074, -2.6584, -2.7107, -2.7634, -2.8266, -2.8981, -2.9673
+]
+
+DATA_STD_80D = [
+    1.0291, 1.0411, 1.0043, 0.9820, 0.9677, 0.9543, 0.9450, 0.9392, 0.9343, 0.9297, 0.9276, 0.9263,
+    0.9242, 0.9254, 0.9232, 0.9281, 0.9263, 0.9315, 0.9274, 0.9247, 0.9277, 0.9199, 0.9188, 0.9194,
+    0.9160, 0.9161, 0.9146, 0.9161, 0.9100, 0.9095, 0.9145, 0.9076, 0.9066, 0.9095, 0.9032, 0.9043,
+    0.9038, 0.9011, 0.9019, 0.9010, 0.8984, 0.8983, 0.8986, 0.8961, 0.8962, 0.8978, 0.8962, 0.8973,
+    0.8993, 0.8976, 0.8995, 0.9016, 0.8982, 0.8972, 0.8974, 0.8949, 0.8940, 0.8947, 0.8936, 0.8939,
+    0.8951, 0.8956, 0.9017, 0.9167, 0.9436, 0.9690, 1.0003, 1.0225, 1.0381, 1.0491, 1.0545, 1.0604,
+    1.0761, 1.0929, 1.1089, 1.1196, 1.1176, 1.1156, 1.1117, 1.1070
+]
+
+DATA_MEAN_128D = [
+    -3.3462, -2.6723, -2.4893, -2.3143, -2.2664, -2.3317, -2.1802, -2.4006, -2.2357, -2.4597,
+    -2.3717, -2.4690, -2.5142, -2.4919, -2.6610, -2.5047, -2.7483, -2.5926, -2.7462, -2.7033,
+    -2.7386, -2.8112, -2.7502, -2.9594, -2.7473, -3.0035, -2.8891, -2.9922, -2.9856, -3.0157,
+    -3.1191, -2.9893, -3.1718, -3.0745, -3.1879, -3.2310, -3.1424, -3.2296, -3.2791, -3.2782,
+    -3.2756, -3.3134, -3.3509, -3.3750, -3.3951, -3.3698, -3.4505, -3.4509, -3.5089, -3.4647,
+    -3.5536, -3.5788, -3.5867, -3.6036, -3.6400, -3.6747, -3.7072, -3.7279, -3.7283, -3.7795,
+    -3.8259, -3.8447, -3.8663, -3.9182, -3.9605, -3.9861, -4.0105, -4.0373, -4.0762, -4.1121,
+    -4.1488, -4.1874, -4.2461, -4.3170, -4.3639, -4.4452, -4.5282, -4.6297, -4.7019, -4.7960,
+    -4.8700, -4.9507, -5.0303, -5.0866, -5.1634, -5.2342, -5.3242, -5.4053, -5.4927, -5.5712,
+    -5.6464, -5.7052, -5.7619, -5.8410, -5.9188, -6.0103, -6.0955, -6.1673, -6.2362, -6.3120,
+    -6.3926, -6.4797, -6.5565, -6.6511, -6.8130, -6.9961, -7.1275, -7.2457, -7.3576, -7.4663,
+    -7.6136, -7.7469, -7.8815, -8.0132, -8.1515, -8.3071, -8.4722, -8.7418, -9.3975, -9.6628,
+    -9.7671, -9.8863, -9.9992, -10.0860, -10.1709, -10.5418, -11.2795, -11.3861
+]
+
+DATA_STD_128D = [
+    2.3804, 2.4368, 2.3772, 2.3145, 2.2803, 2.2510, 2.2316, 2.2083, 2.1996, 2.1835, 2.1769, 2.1659,
+    2.1631, 2.1618, 2.1540, 2.1606, 2.1571, 2.1567, 2.1612, 2.1579, 2.1679, 2.1683, 2.1634, 2.1557,
+    2.1668, 2.1518, 2.1415, 2.1449, 2.1406, 2.1350, 2.1313, 2.1415, 2.1281, 2.1352, 2.1219, 2.1182,
+    2.1327, 2.1195, 2.1137, 2.1080, 2.1179, 2.1036, 2.1087, 2.1036, 2.1015, 2.1068, 2.0975, 2.0991,
+    2.0902, 2.1015, 2.0857, 2.0920, 2.0893, 2.0897, 2.0910, 2.0881, 2.0925, 2.0873, 2.0960, 2.0900,
+    2.0957, 2.0958, 2.0978, 2.0936, 2.0886, 2.0905, 2.0845, 2.0855, 2.0796, 2.0840, 2.0813, 2.0817,
+    2.0838, 2.0840, 2.0917, 2.1061, 2.1431, 2.1976, 2.2482, 2.3055, 2.3700, 2.4088, 2.4372, 2.4609,
+    2.4731, 2.4847, 2.5072, 2.5451, 2.5772, 2.6147, 2.6529, 2.6596, 2.6645, 2.6726, 2.6803, 2.6812,
+    2.6899, 2.6916, 2.6931, 2.6998, 2.7062, 2.7262, 2.7222, 2.7158, 2.7041, 2.7485, 2.7491, 2.7451,
+    2.7485, 2.7233, 2.7297, 2.7233, 2.7145, 2.6958, 2.6788, 2.6439, 2.6007, 2.4786, 2.2469, 2.1877,
+    2.1392, 2.0717, 2.0107, 1.9676, 1.9140, 1.7102, 0.9101, 0.7164
+]
+
+
+class VAE(nn.Module):
+
+    def __init__(
+        self,
+        *,
+        data_dim: int,
+        embed_dim: int,
+        hidden_dim: int,
+    ):
+        super().__init__()
+
+        if data_dim == 80:
+            self.data_mean = nn.Buffer(torch.tensor(DATA_MEAN_80D, dtype=torch.float32))
+            self.data_std = nn.Buffer(torch.tensor(DATA_STD_80D, dtype=torch.float32))
+        elif data_dim == 128:
+            self.data_mean = nn.Buffer(torch.tensor(DATA_MEAN_128D, dtype=torch.float32))
+            self.data_std = nn.Buffer(torch.tensor(DATA_STD_128D, dtype=torch.float32))
+
+        self.data_mean = self.data_mean.view(1, -1, 1)
+        self.data_std = self.data_std.view(1, -1, 1)
+
+        self.encoder = Encoder1D(
+            dim=hidden_dim,
+            ch_mult=(1, 2, 4),
+            num_res_blocks=2,
+            attn_layers=[3],
+            down_layers=[0],
+            in_dim=data_dim,
+            embed_dim=embed_dim,
+        )
+        self.decoder = Decoder1D(
+            dim=hidden_dim,
+            ch_mult=(1, 2, 4),
+            num_res_blocks=2,
+            attn_layers=[3],
+            down_layers=[0],
+            in_dim=data_dim,
+            out_dim=data_dim,
+            embed_dim=embed_dim,
+        )
+
+        self.embed_dim = embed_dim
+        # self.quant_conv = nn.Conv1d(2 * embed_dim, 2 * embed_dim, 1)
+        # self.post_quant_conv = nn.Conv1d(embed_dim, embed_dim, 1)
+
+        self.initialize_weights()
+
+    def initialize_weights(self):
+        pass
+
+    def encode(self, x: torch.Tensor, normalize: bool = True) -> DiagonalGaussianDistribution:
+        if normalize:
+            x = self.normalize(x)
+        moments = self.encoder(x)
+        posterior = DiagonalGaussianDistribution(moments)
+        return posterior
+
+    def decode(self, z: torch.Tensor, unnormalize: bool = True) -> torch.Tensor:
+        dec = self.decoder(z)
+        if unnormalize:
+            dec = self.unnormalize(dec)
+        return dec
+
+    def normalize(self, x: torch.Tensor) -> torch.Tensor:
+        return (x - comfy.model_management.cast_to(self.data_mean, dtype=x.dtype, device=x.device)) / comfy.model_management.cast_to(self.data_std, dtype=x.dtype, device=x.device)
+
+    def unnormalize(self, x: torch.Tensor) -> torch.Tensor:
+        return x * comfy.model_management.cast_to(self.data_std, dtype=x.dtype, device=x.device) + comfy.model_management.cast_to(self.data_mean, dtype=x.dtype, device=x.device)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        sample_posterior: bool = True,
+        rng: Optional[torch.Generator] = None,
+        normalize: bool = True,
+        unnormalize: bool = True,
+    ) -> tuple[torch.Tensor, DiagonalGaussianDistribution]:
+
+        posterior = self.encode(x, normalize=normalize)
+        if sample_posterior:
+            z = posterior.sample(rng)
+        else:
+            z = posterior.mode()
+        dec = self.decode(z, unnormalize=unnormalize)
+        return dec, posterior
+
+    def load_weights(self, src_dict) -> None:
+        self.load_state_dict(src_dict, strict=True)
+
+    @property
+    def device(self) -> torch.device:
+        return next(self.parameters()).device
+
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+
+    def remove_weight_norm(self):
+        return self
+
+
+class Encoder1D(nn.Module):
+
+    def __init__(self,
+                 *,
+                 dim: int,
+                 ch_mult: tuple[int] = (1, 2, 4, 8),
+                 num_res_blocks: int,
+                 attn_layers: list[int] = [],
+                 down_layers: list[int] = [],
+                 resamp_with_conv: bool = True,
+                 in_dim: int,
+                 embed_dim: int,
+                 double_z: bool = True,
+                 kernel_size: int = 3,
+                 clip_act: float = 256.0):
+        super().__init__()
+        self.dim = dim
+        self.num_layers = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.in_channels = in_dim
+        self.clip_act = clip_act
+        self.down_layers = down_layers
+        self.attn_layers = attn_layers
+        self.conv_in = ops.Conv1d(in_dim, self.dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+
+        in_ch_mult = (1, ) + tuple(ch_mult)
+        self.in_ch_mult = in_ch_mult
+        # downsampling
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_layers):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = dim * in_ch_mult[i_level]
+            block_out = dim * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(
+                    ResnetBlock1D(in_dim=block_in,
+                                  out_dim=block_out,
+                                  kernel_size=kernel_size,
+                                  use_norm=True))
+                block_in = block_out
+                if i_level in attn_layers:
+                    attn.append(AttnBlock1D(block_in))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level in down_layers:
+                down.downsample = Downsample1D(block_in, resamp_with_conv)
+            self.down.append(down)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock1D(in_dim=block_in,
+                                         out_dim=block_in,
+                                         kernel_size=kernel_size,
+                                         use_norm=True)
+        self.mid.attn_1 = AttnBlock1D(block_in)
+        self.mid.block_2 = ResnetBlock1D(in_dim=block_in,
+                                         out_dim=block_in,
+                                         kernel_size=kernel_size,
+                                         use_norm=True)
+
+        # end
+        self.conv_out = ops.Conv1d(block_in,
+                                 2 * embed_dim if double_z else embed_dim,
+                                 kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+
+        self.learnable_gain = nn.Parameter(torch.zeros([]))
+
+    def forward(self, x):
+
+        # downsampling
+        h = self.conv_in(x)
+        for i_level in range(self.num_layers):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](h)
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+                h = h.clamp(-self.clip_act, self.clip_act)
+            if i_level in self.down_layers:
+                h = self.down[i_level].downsample(h)
+
+        # middle
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        h = h.clamp(-self.clip_act, self.clip_act)
+
+        # end
+        h = nonlinearity(h)
+        h = self.conv_out(h) * (self.learnable_gain + 1)
+        return h
+
+
+class Decoder1D(nn.Module):
+
+    def __init__(self,
+                 *,
+                 dim: int,
+                 out_dim: int,
+                 ch_mult: tuple[int] = (1, 2, 4, 8),
+                 num_res_blocks: int,
+                 attn_layers: list[int] = [],
+                 down_layers: list[int] = [],
+                 kernel_size: int = 3,
+                 resamp_with_conv: bool = True,
+                 in_dim: int,
+                 embed_dim: int,
+                 clip_act: float = 256.0):
+        super().__init__()
+        self.ch = dim
+        self.num_layers = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.in_channels = in_dim
+        self.clip_act = clip_act
+        self.down_layers = [i + 1 for i in down_layers]  # each downlayer add one
+
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = dim * ch_mult[self.num_layers - 1]
+
+        # z to block_in
+        self.conv_in = ops.Conv1d(embed_dim, block_in, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock1D(in_dim=block_in, out_dim=block_in, use_norm=True)
+        self.mid.attn_1 = AttnBlock1D(block_in)
+        self.mid.block_2 = ResnetBlock1D(in_dim=block_in, out_dim=block_in, use_norm=True)
+
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_layers)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = dim * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(ResnetBlock1D(in_dim=block_in, out_dim=block_out, use_norm=True))
+                block_in = block_out
+                if i_level in attn_layers:
+                    attn.append(AttnBlock1D(block_in))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level in self.down_layers:
+                up.upsample = Upsample1D(block_in, resamp_with_conv)
+            self.up.insert(0, up)  # prepend to get consistent order
+
+        # end
+        self.conv_out = ops.Conv1d(block_in, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+        self.learnable_gain = nn.Parameter(torch.zeros([]))
+
+    def forward(self, z):
+        # z to block_in
+        h = self.conv_in(z)
+
+        # middle
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        h = h.clamp(-self.clip_act, self.clip_act)
+
+        # upsampling
+        for i_level in reversed(range(self.num_layers)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+                h = h.clamp(-self.clip_act, self.clip_act)
+            if i_level in self.down_layers:
+                h = self.up[i_level].upsample(h)
+
+        h = nonlinearity(h)
+        h = self.conv_out(h) * (self.learnable_gain + 1)
+        return h
+
+
+def VAE_16k(**kwargs) -> VAE:
+    return VAE(data_dim=80, embed_dim=20, hidden_dim=384, **kwargs)
+
+
+def VAE_44k(**kwargs) -> VAE:
+    return VAE(data_dim=128, embed_dim=40, hidden_dim=512, **kwargs)
+
+
+def get_my_vae(name: str, **kwargs) -> VAE:
+    if name == '16k':
+        return VAE_16k(**kwargs)
+    if name == '44k':
+        return VAE_44k(**kwargs)
+    raise ValueError(f'Unknown model: {name}')
+
--- a/comfy/ldm/mmaudio/vae/vae_modules.py
+++ b/comfy/ldm/mmaudio/vae/vae_modules.py
@ -0,0 +1,121 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from comfy.ldm.modules.diffusionmodules.model import vae_attention
+import math
+import comfy.ops
+ops = comfy.ops.disable_weight_init
+
+def nonlinearity(x):
+    # swish
+    return torch.nn.functional.silu(x) / 0.596
+
+def mp_sum(a, b, t=0.5):
+    return a.lerp(b, t) / math.sqrt((1 - t)**2 + t**2)
+
+def normalize(x, dim=None, eps=1e-4):
+    if dim is None:
+        dim = list(range(1, x.ndim))
+    norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32)
+    norm = torch.add(eps, norm, alpha=math.sqrt(norm.numel() / x.numel()))
+    return x / norm.to(x.dtype)
+
+class ResnetBlock1D(nn.Module):
+
+    def __init__(self, *, in_dim, out_dim=None, conv_shortcut=False, kernel_size=3, use_norm=True):
+        super().__init__()
+        self.in_dim = in_dim
+        out_dim = in_dim if out_dim is None else out_dim
+        self.out_dim = out_dim
+        self.use_conv_shortcut = conv_shortcut
+        self.use_norm = use_norm
+
+        self.conv1 = ops.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+        self.conv2 = ops.Conv1d(out_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+        if self.in_dim != self.out_dim:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = ops.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)
+            else:
+                self.nin_shortcut = ops.Conv1d(in_dim, out_dim, kernel_size=1, padding=0, bias=False)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+
+        # pixel norm
+        if self.use_norm:
+            x = normalize(x, dim=1)
+
+        h = x
+        h = nonlinearity(h)
+        h = self.conv1(h)
+
+        h = nonlinearity(h)
+        h = self.conv2(h)
+
+        if self.in_dim != self.out_dim:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x)
+            else:
+                x = self.nin_shortcut(x)
+
+        return mp_sum(x, h, t=0.3)
+
+
+class AttnBlock1D(nn.Module):
+
+    def __init__(self, in_channels, num_heads=1):
+        super().__init__()
+        self.in_channels = in_channels
+
+        self.num_heads = num_heads
+        self.qkv = ops.Conv1d(in_channels, in_channels * 3, kernel_size=1, padding=0, bias=False)
+        self.proj_out = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
+        self.optimized_attention = vae_attention()
+
+    def forward(self, x):
+        h = x
+        y = self.qkv(h)
+        y = y.reshape(y.shape[0], -1, 3, y.shape[-1])
+        q, k, v = normalize(y, dim=1).unbind(2)
+
+        h = self.optimized_attention(q, k, v)
+        h = self.proj_out(h)
+
+        return mp_sum(x, h, t=0.3)
+
+
+class Upsample1D(nn.Module):
+
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = ops.Conv1d(in_channels, in_channels, kernel_size=3, padding=1, bias=False)
+
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode='nearest-exact')  # support 3D tensor(B,C,T)
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+
+
+class Downsample1D(nn.Module):
+
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv1 = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
+            self.conv2 = ops.Conv1d(in_channels, in_channels, kernel_size=1, padding=0, bias=False)
+
+    def forward(self, x):
+
+        if self.with_conv:
+            x = self.conv1(x)
+
+        x = F.avg_pool1d(x, kernel_size=2, stride=2)
+
+        if self.with_conv:
+            x = self.conv2(x)
+
+        return x
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@ -251,6 +251,7 @@ class WanAttentionBlock(nn.Module):
            freqs, transformer_options=transformer_options)

        x = torch.addcmul(x, y, repeat_e(e[2], x))
+        del y

        # cross-attention & ffn
        x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len, transformer_options=transformer_options)
@ -939,7 +940,7 @@ class MotionEncoder_tc(nn.Module):
    def __init__(self,
                 in_dim: int,
                 hidden_dim: int,
-                 num_heads=int,
+                 num_heads: int,
                 need_global=True,
                 dtype=None,
                 device=None,
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@ -468,55 +468,46 @@ class WanVAE(nn.Module):
                                 attn_scales, self.temperal_upsample, dropout)

    def encode(self, x):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
        ## cache
        t = x.shape[2]
        iter_ = 1 + (t - 1) // 4
        ## 对encode输入的x，按时间拆分为1、4、4、4....
        for i in range(iter_):
-            self._enc_conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.encoder(
                    x[:, :, :1, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx)
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx)
            else:
                out_ = self.encoder(
                    x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx)
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx)
                out = torch.cat([out, out_], 2)
        mu, log_var = self.conv1(out).chunk(2, dim=1)
-        self.clear_cache()
        return mu

    def decode(self, z):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
        # z: [b,c,t,h,w]

        iter_ = z.shape[2]
        x = self.conv2(z)
        for i in range(iter_):
-            self._conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx)
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx)
            else:
                out_ = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx)
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx)
                out = torch.cat([out, out_], 2)
-        self.clear_cache()
        return out
-
-    def clear_cache(self):
-        self._conv_num = count_conv3d(self.decoder)
-        self._conv_idx = [0]
-        self._feat_map = [None] * self._conv_num
-        #cache encode
-        self._enc_conv_num = count_conv3d(self.encoder)
-        self._enc_conv_idx = [0]
-        self._enc_feat_map = [None] * self._enc_conv_num
--- a/comfy/ldm/wan/vae2_2.py
+++ b/comfy/ldm/wan/vae2_2.py
@ -656,51 +656,51 @@ class WanVAE(nn.Module):
        )

    def encode(self, x):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.encoder)
        x = patchify(x, patch_size=2)
        t = x.shape[2]
        iter_ = 1 + (t - 1) // 4
        for i in range(iter_):
-            self._enc_conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.encoder(
                    x[:, :, :1, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                )
            else:
                out_ = self.encoder(
                    x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                )
                out = torch.cat([out, out_], 2)
        mu, log_var = self.conv1(out).chunk(2, dim=1)
-        self.clear_cache()
        return mu

    def decode(self, z):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
        iter_ = z.shape[2]
        x = self.conv2(z)
        for i in range(iter_):
-            self._conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                    first_chunk=True,
                )
            else:
                out_ = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                )
                out = torch.cat([out, out_], 2)
        out = unpatchify(out, patch_size=2)
-        self.clear_cache()
        return out

    def reparameterize(self, mu, log_var):
@ -714,12 +714,3 @@ class WanVAE(nn.Module):
            return mu
        std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
        return mu + std * torch.randn_like(std)
-
-    def clear_cache(self):
-        self._conv_num = count_conv3d(self.decoder)
-        self._conv_idx = [0]
-        self._feat_map = [None] * self._conv_num
-        # cache encode
-        self._enc_conv_num = count_conv3d(self.encoder)
-        self._enc_conv_idx = [0]
-        self._enc_feat_map = [None] * self._enc_conv_num
--- a/comfy/middleware/cache_middleware.py
+++ b/comfy/middleware/cache_middleware.py
@ -26,11 +26,12 @@ async def cache_control(
    """Cache control middleware that sets appropriate cache headers based on file type and response status"""
    response: web.Response = await handler(request)

-    if (
-        request.path.endswith(".js")
-        or request.path.endswith(".css")
-        or request.path.endswith("index.json")
-    ):
+    path_filename = request.path.rsplit("/", 1)[-1]
+    is_entry_point = path_filename.startswith("index") and path_filename.endswith(
+        ".json"
+    )
+
+    if request.path.endswith(".js") or request.path.endswith(".css") or is_entry_point:
        response.headers.setdefault("Cache-Control", "no-cache")
        return response

--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -154,6 +154,7 @@ class BaseModel(torch.nn.Module):
                operations = model_config.custom_operations
            self.operations = operations
            self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
+            self.diffusion_model.eval()
            if model_management.force_channels_last():
                self.diffusion_model.to(memory_format=torch.channels_last)
                logger.debug("using channels last mode for diffusion model")
@ -704,7 +705,6 @@ class Lotus(BaseModel):
 class StableCascade_C(BaseModel):
    def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=StageC)
-        self.diffusion_model.eval().requires_grad_(False)

    def extra_conds(self, **kwargs):
        out = {}
@ -733,7 +733,6 @@ class StableCascade_C(BaseModel):
 class StableCascade_B(BaseModel):
    def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=StageB)
-        self.diffusion_model.eval().requires_grad_(False)

    def extra_conds(self, **kwargs):
        out = {}
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -221,7 +221,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                dit_config["nerf_mlp_ratio"] = 4
                dit_config["nerf_depth"] = 4
                dit_config["nerf_max_freqs"] = 8
-                dit_config["nerf_tile_size"] = 32
+                dit_config["nerf_tile_size"] = 512
                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
                dit_config["nerf_embedder_dtype"] = torch.float32
        else:
@ -373,8 +373,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["patch_size"] = 2
        dit_config["in_channels"] = 16
        dit_config["dim"] = 2304
-        dit_config["cap_feat_dim"] = 2304
-        dit_config["n_layers"] = 26
+        dit_config["cap_feat_dim"] = state_dict['{}cap_embedder.1.weight'.format(key_prefix)].shape[1]
+        dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
        dit_config["n_heads"] = 24
        dit_config["n_kv_heads"] = 8
        dit_config["qk_norm"] = True
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -382,6 +382,7 @@ except:
 SUPPORT_FP8_OPS = args.supports_fp8_compute
 try:
    if is_amd():
+        torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
        try:
            rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
        except:
@ -394,11 +395,11 @@ try:
                if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                        ENABLE_PYTORCH_ATTENTION = True
-        #                if torch_version_numeric >= (2, 8):
-        #                    if any((a in arch) for a in ["gfx1201"]):
-        #                        ENABLE_PYTORCH_ATTENTION = True
+                        if rocm_version >= (7, 0):
+                            if any((a in arch) for a in ["gfx1201"]):
+                               ENABLE_PYTORCH_ATTENTION = True
        if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
-            if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx942", "gfx950"]):  # TODO: more arches
+            if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx950"]):  # TODO: more arches, "gfx942" gives error on pytorch nightly 2.10 1013 rocm7.0
                SUPPORT_FP8_OPS = True

 except:
@ -418,6 +419,9 @@ try:
 except:
    pass

+if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
+    torch.backends.cudnn.benchmark = True
+
 try:
    if torch_version_numeric >= (2, 5):
        torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)  # pylint: disable=no-member
@ -1037,11 +1041,7 @@ def vae_dtype(device=None, allowed_dtypes=[]):
        if d == torch.float16 and should_use_fp16(device):
            return d

-        # NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32
-        # slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3
-        # also a problem on RDNA4 except fp32 is also slow there.
-        # This is due to large bf16 convolutions being extremely slow.
-        if d == torch.bfloat16 and ((not is_amd()) or amd_min_version(device, min_rdna_version=4)) and should_use_bf16(device):
+        if d == torch.bfloat16 and should_use_bf16(device):
            return d

    return torch.float32
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -141,17 +141,30 @@ def move_weight_functions(m, device):


 class LowVramPatch:
-    def __init__(self, key, patches):
+    def __init__(self, key, patches, convert_func=None, set_func=None):
        self.key = key
        self.patches = patches
+        self.convert_func = convert_func
+        self.set_func = set_func

    def __call__(self, weight):
        intermediate_dtype = weight.dtype
+        if self.convert_func is not None:
+            weight = self.convert_func(weight.to(dtype=torch.float32, copy=True), inplace=True)
+
        if intermediate_dtype not in [torch.float32, torch.float16, torch.bfloat16]:  # intermediate_dtype has to be one that is supported in math ops
            intermediate_dtype = torch.float32
-            return stochastic_rounding(lora.calculate_weight(self.patches[self.key], weight.to(intermediate_dtype), self.key, intermediate_dtype=intermediate_dtype), weight.dtype, seed=string_to_seed(self.key))
-        return lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype)
+            out = lora.calculate_weight(self.patches[self.key], weight.to(intermediate_dtype), self.key, intermediate_dtype=intermediate_dtype)
+            if self.set_func is None:
+                return float.stochastic_rounding(out, weight.dtype, seed=string_to_seed(self.key))
+            else:
+                return self.set_func(out, seed=string_to_seed(self.key), return_weight=True)

+        out = lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=intermediate_dtype)
+        if self.set_func is not None:
+            return self.set_func(out, seed=string_to_seed(self.key), return_weight=True).to(dtype=intermediate_dtype)
+        else:
+            return out

 def get_key_weight(model, key):
    set_func = None
@ -746,13 +759,15 @@ class ModelPatcher(ModelManageable, PatchSupport):
                        if force_patch_weights:
                            self.patch_weight_to_device(weight_key)
                        else:
-                            m.weight_function = [LowVramPatch(weight_key, self.patches)]
+                            _, set_func, convert_func = get_key_weight(self.model, weight_key)
+                            m.weight_function = [LowVramPatch(weight_key, self.patches, convert_func, set_func)]
                            patch_counter += 1
                    if bias_key in self.patches:
                        if force_patch_weights:
                            self.patch_weight_to_device(bias_key)
                        else:
-                            m.bias_function = [LowVramPatch(bias_key, self.patches)]
+                            _, set_func, convert_func = get_key_weight(self.model, bias_key)
+                            m.bias_function = [LowVramPatch(bias_key, self.patches, convert_func, set_func)]
                            patch_counter += 1

                    cast_weight = True
@ -948,10 +963,12 @@ class ModelPatcher(ModelManageable, PatchSupport):
                        module_mem += move_weight_functions(m, device_to)
                        if lowvram_possible:
                            if weight_key in self.patches:
-                                m.weight_function.append(LowVramPatch(weight_key, self.patches))
+                                _, set_func, convert_func = get_key_weight(self.model, weight_key)
+                                m.weight_function.append(LowVramPatch(weight_key, self.patches, convert_func, set_func))
                                patch_counter += 1
                            if bias_key in self.patches:
-                                m.bias_function.append(LowVramPatch(bias_key, self.patches))
+                                _, set_func, convert_func = get_key_weight(self.model, bias_key)
+                                m.bias_function.append(LowVramPatch(bias_key, self.patches, convert_func, set_func))
                                patch_counter += 1
                            cast_weight = True

--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@ -26,6 +26,13 @@ def rescale_zero_terminal_snr_sigmas(sigmas):
    return ((1 - alphas_bar) / alphas_bar) ** 0.5


+def reshape_sigma(sigma, noise_dim):
+    if sigma.nelement() == 1:
+        return sigma.view(())
+    else:
+        return sigma.view(sigma.shape[:1] + (1,) * (noise_dim - 1))
+
+
 class ModelSampling(Protocol):
    def calculate_input(self, sigma, noise) -> torch.Tensor:
        ...
@ -38,15 +45,15 @@ class EPS(ModelSampling):
    sigma_data: float

    def calculate_input(self, sigma, noise):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+        sigma = reshape_sigma(sigma, noise.ndim)
        return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5

    def calculate_denoised(self, sigma, model_output, model_input):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+        sigma = reshape_sigma(sigma, model_output.ndim)
        return model_input - model_output * sigma

    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+        sigma = reshape_sigma(sigma, noise.ndim)
        if max_denoise:
            noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
        else:
@ -61,13 +68,13 @@ class EPS(ModelSampling):

 class V_PREDICTION(EPS):
    def calculate_denoised(self, sigma, model_output, model_input):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+        sigma = reshape_sigma(sigma, model_output.ndim)
        return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5


 class EDM(V_PREDICTION):
    def calculate_denoised(self, sigma, model_output, model_input):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+        sigma = reshape_sigma(sigma, model_output.ndim)
        return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) + model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5


@ -76,15 +83,15 @@ class CONST(ModelSampling):
        return noise

    def calculate_denoised(self, sigma, model_output, model_input):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+        sigma = reshape_sigma(sigma, model_output.ndim)
        return model_input - model_output * sigma

    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+        sigma = reshape_sigma(sigma, noise.ndim)
        return sigma * noise + (1.0 - sigma) * latent_image

    def inverse_noise_scaling(self, sigma, latent):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (latent.ndim - 1))
+        sigma = reshape_sigma(sigma, latent.ndim)
        return latent / (1.0 - sigma)


@ -97,19 +104,20 @@ class IMG_TO_IMG(X0):
    def calculate_input(self, sigma, noise):
        return noise

+
 class COSMOS_RFLOW:
    def calculate_input(self, sigma, noise):
        sigma = (sigma / (sigma + 1))
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+        sigma = reshape_sigma(sigma, noise.ndim)
        return noise * (1.0 - sigma)

    def calculate_denoised(self, sigma, model_output, model_input):
        sigma = (sigma / (sigma + 1))
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+        sigma = reshape_sigma(sigma, model_output.ndim)
        return model_input * (1.0 - sigma) - model_output * sigma

    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
-        sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+        sigma = reshape_sigma(sigma, noise.ndim)
        noise = noise * sigma
        noise += latent_image
        return noise
@ -117,6 +125,7 @@ class COSMOS_RFLOW:
    def inverse_noise_scaling(self, sigma, latent):
        return latent

+
 class ModelSamplingDiscrete(torch.nn.Module):
    def __init__(self, model_config=None, zsnr=None):
        super().__init__()
--- a/comfy/nodes/base_nodes.py
+++ b/comfy/nodes/base_nodes.py
@ -2149,7 +2149,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "DiffControlNetLoader": "Load ControlNet Model (diff)",
    "StyleModelLoader": "Load Style Model",
    "CLIPVisionLoader": "Load CLIP Vision",
-    "UpscaleModelLoader": "Load Upscale Model",
    "UNETLoader": "Load Diffusion Model",
    # Conditioning
    "CLIPVisionEncode": "CLIP Vision Encode",
@ -2187,7 +2186,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "LoadImageOutput": "Load Image (from Outputs)",
    "ImageScale": "Upscale Image",
    "ImageScaleBy": "Upscale Image By",
-    "ImageUpscaleWithModel": "Upscale Image (using Model)",
    "ImageInvert": "Invert Image",
    "ImagePadForOutpaint": "Pad Image for Outpainting",
    "ImageBatch": "Batch Images",
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -22,12 +22,21 @@ from torch import Tensor
 from typing import Optional, Type, Union

 from . import model_management, rmsnorm
+from .interruption import throw_exception_if_processing_interrupted
 from .cli_args import args, PerformanceFeature
 from .execution_context import current_execution_context
 from .float import stochastic_rounding

 logger = logging.getLogger(__name__)

+
+def run_every_op():
+    if torch.compiler.is_compiling():
+        return
+
+    throw_exception_if_processing_interrupted()
+
+
 scaled_dot_product_attention = None


@ -66,16 +75,24 @@ except Exception as exc_info:
        logger.debug("Could not set sdpa backend priority.", exc_info=exc_info)
    scaled_dot_product_attention = _scaled_dot_product_attention

+NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False
+try:
+    if comfy.model_management.is_nvidia():
+        if torch.backends.cudnn.version() >= 91002 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
+            # TODO: change upper bound version once it's fixed'
+            NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True
+            logging.info("working around nvidia conv3d memory bug.")
+except:
+    pass
+
 cast_to = model_management.cast_to  # TODO: remove once no more references


-if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
-    torch.backends.cudnn.benchmark = True
-
 def cast_to_input(weight, input, non_blocking=False, copy=True):
    return model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)


+@torch.compiler.disable()
 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
    if input is not None:
        if dtype is None:
@ -174,6 +191,7 @@ class disable_weight_init:
            return torch.nn.functional.linear(input, weight, bias)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -188,6 +206,7 @@ class disable_weight_init:
            return self._conv_forward(input, weight, bias)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -202,6 +221,7 @@ class disable_weight_init:
            return self._conv_forward(input, weight, bias)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -211,11 +231,21 @@ class disable_weight_init:
        def reset_parameters(self):
            return None

+        def _conv_forward(self, input, weight, bias, *args, **kwargs):
+            if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16):
+                out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True)
+                if bias is not None:
+                    out += bias.reshape((1, -1) + (1,) * (out.ndim - 2))
+                return out
+            else:
+                return super()._conv_forward(input, weight, bias, *args, **kwargs)
+
        def forward_comfy_cast_weights(self, input):
            weight, bias = cast_bias_weight(self, input)
            return self._conv_forward(input, weight, bias)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -230,6 +260,7 @@ class disable_weight_init:
            return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -248,6 +279,7 @@ class disable_weight_init:
            return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -267,6 +299,7 @@ class disable_weight_init:
            # return torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -288,6 +321,7 @@ class disable_weight_init:
                output_padding, self.groups, self.dilation)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -309,6 +343,7 @@ class disable_weight_init:
                output_padding, self.groups, self.dilation)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -327,6 +362,7 @@ class disable_weight_init:
            return torch.nn.functional.embedding(input, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse).to(dtype=output_dtype)

        def forward(self, *args, **kwargs):
+            run_every_op()
            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
@ -488,8 +524,10 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None
                else:
                    return weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype)

-            def set_weight(self, weight, inplace_update=False, seed=None, **kwargs):
+            def set_weight(self, weight, inplace_update=False, seed=None, return_weight=False, **kwargs):
                weight = stochastic_rounding(weight / self.scale_weight.to(device=weight.device, dtype=weight.dtype), self.weight.dtype, seed=seed)
+                if return_weight:
+                    return weight
                if inplace_update:
                    self.weight.data.copy_(weight)
                else:
--- a/comfy/patcher_extension.py
+++ b/comfy/patcher_extension.py
@ -150,7 +150,7 @@ def merge_nested_dicts(dict1: dict, dict2: dict, copy_dict1=True):
    for key, value in dict2.items():
        if isinstance(value, dict):
            curr_value = merged_dict.setdefault(key, {})
-            merged_dict[key] = merge_nested_dicts(value, curr_value)
+            merged_dict[key] = merge_nested_dicts(curr_value, value)
        elif isinstance(value, list):
            merged_dict.setdefault(key, []).extend(value)
        else:
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -324,17 +324,10 @@ def _calc_cond_batch(model: BaseModel, conds, x_in: torch.Tensor, timestep: torc
                                                                           copy_dict1=False)

            if patches is not None:
-                # TODO: replace with merge_nested_dicts function
-                if "patches" in transformer_options:
-                    cur_patches = transformer_options["patches"].copy()
-                    for p in patches:
-                        if p in cur_patches:
-                            cur_patches[p] = cur_patches[p] + patches[p]
-                        else:
-                            cur_patches[p] = patches[p]
-                    transformer_options["patches"] = cur_patches
-                else:
-                    transformer_options["patches"] = patches
+                transformer_options["patches"] = comfy.patcher_extension.merge_nested_dicts(
+                    transformer_options.get("patches", {}),
+                    patches
+                )

            transformer_options["cond_or_uncond"] = cond_or_uncond[:]
            transformer_options["uuids"] = uuids[:]
@ -382,7 +375,7 @@ def cfg_function(model, cond_pred, uncond_pred, cond_scale, x, timestep, model_o
        model_options = {}
    if "sampler_cfg_function" in model_options:
        args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
-                "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
+                "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options, "input_cond": cond, "input_uncond": uncond}
        cfg_result = x - model_options["sampler_cfg_function"](args)
    else:
        cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -35,6 +35,7 @@ from .ldm.genmo.vae import model as genmo_model
 from .ldm.hunyuan3d.vae import ShapeVAE
 from .ldm.lightricks.vae import causal_video_autoencoder as lightricks
 from .ldm.models.autoencoder import AutoencoderKL, AutoencodingEngine
+from .ldm.mmaudio.vae.autoencoder import AudioAutoencoder
 from .ldm.wan import vae as wan_vae
 from .ldm.wan import vae2_2 as wan_vae2_2
 from .lora import load_lora, model_lora_keys_unet, model_lora_keys_clip
@ -292,8 +293,12 @@ class VAE:
        if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys():  # diffusers format
            sd = diffusers_convert.convert_vae_state_dict(sd)

-        self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype)  # These are for AutoencoderKL and need tweaking (should be lower)
-        self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype)
+        if model_management.is_amd():
+            VAE_KL_MEM_RATIO = 2.73
+        else:
+            VAE_KL_MEM_RATIO = 1.0
+            self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO  # These are for AutoencoderKL and need tweaking (should be lower)
+        self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype) * VAE_KL_MEM_RATIO
        self.downscale_ratio = 8
        self.upscale_ratio = 8
        self.latent_channels = 4
@ -308,6 +313,7 @@ class VAE:
        self.downscale_index_formula = None
        self.upscale_index_formula = None
        self.extra_1d_channel = None
+        self.crop_input = True

        if config is None:
            if "decoder.mid.block_1.mix_factor" in sd:
@ -349,35 +355,51 @@ class VAE:
                self.first_stage_model = StageC_coder()
                self.downscale_ratio = 32
                self.latent_channels = 16
-            elif "decoder.conv_in.weight" in sd and sd['decoder.conv_in.weight'].shape[1] == 64:
-                ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
-                self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
-                self.downscale_ratio = 32
-                self.upscale_ratio = 32
-                self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
-                self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
-                                                            encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
-                                                            decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
-
-                self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
-                self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
-
            elif "decoder.conv_in.weight" in sd:
-                # default SD1.x/SD2.x VAE parameters
-                ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
-
-                if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd:  # Stable diffusion x4 upscaler VAE
-                    ddconfig['ch_mult'] = [1, 2, 4]
-                    self.downscale_ratio = 4
-                    self.upscale_ratio = 4
-
-                self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
-                if 'post_quant_conv.weight' in sd:
-                    self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
-                else:
+                if sd['decoder.conv_in.weight'].shape[1] == 64:
+                    ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
+                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
+                    self.downscale_ratio = 32
+                    self.upscale_ratio = 32
+                    self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
                    self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
-                                                                encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
-                                                                decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
+                                                                encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
+                                                                decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
+
+                    self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
+                    self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
+                elif sd['decoder.conv_in.weight'].shape[1] == 32:
+                    ddconfig = {"block_out_channels": [128, 256, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 16, "ffactor_temporal": 4, "downsample_match_channel": True, "upsample_match_channel": True, "refiner_vae": False}
+                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
+                    self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+                    self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 16, 16)
+                    self.upscale_index_formula = (4, 16, 16)
+                    self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 16, 16)
+                    self.downscale_index_formula = (4, 16, 16)
+                    self.latent_dim = 3
+                    self.not_video = True
+                    self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
+                                                                encoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Encoder", 'params': ddconfig},
+                                                                decoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Decoder", 'params': ddconfig})
+
+                    self.memory_used_encode = lambda shape, dtype: (2800 * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)
+                    self.memory_used_decode = lambda shape, dtype: (2800 * shape[-3] * shape[-2] * shape[-1] * 16 * 16) * model_management.dtype_size(dtype)
+                else:
+                    # default SD1.x/SD2.x VAE parameters
+                    ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
+
+                    if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd:  # Stable diffusion x4 upscaler VAE
+                        ddconfig['ch_mult'] = [1, 2, 4]
+                        self.downscale_ratio = 4
+                        self.upscale_ratio = 4
+
+                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
+                    if 'post_quant_conv.weight' in sd:
+                        self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
+                    else:
+                        self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
+                                                                    encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
+                                                                    decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
            elif "decoder.layers.1.layers.0.beta" in sd:
                self.first_stage_model = AudioOobleckVAE()
                self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
@ -551,6 +573,25 @@ class VAE:
                self.latent_channels = 3
                self.latent_dim = 2
                self.output_channels = 3
+            elif "vocoder.activation_post.downsample.lowpass.filter" in sd:  # MMAudio VAE
+                sample_rate = 16000
+                if sample_rate == 16000:
+                    mode = '16k'
+                else:
+                    mode = '44k'
+
+                self.first_stage_model = AudioAutoencoder(mode=mode)
+                self.memory_used_encode = lambda shape, dtype: (30 * shape[2]) * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: (90 * shape[2] * 1411.2) * model_management.dtype_size(dtype)
+                self.latent_channels = 20
+                self.output_channels = 2
+                self.upscale_ratio = 512 * (44100 / sample_rate)
+                self.downscale_ratio = 512 * (44100 / sample_rate)
+                self.latent_dim = 1
+                self.process_output = lambda audio: audio
+                self.process_input = lambda audio: audio
+                self.working_dtypes = [torch.float32]
+                self.crop_input = False
            else:
                logger.warning("WARNING: No VAE weights detected, VAE not initalized.")
                self.first_stage_model = None
@ -607,6 +648,9 @@ class VAE:
            raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")

    def vae_encode_crop_pixels(self, pixels):
+        if not self.crop_input:
+            return pixels
+
        downscale_ratio = self.spacial_compression_encode()

        dims = pixels.shape[1:-1]
@ -684,6 +728,7 @@ class VAE:
    def decode(self, samples_in, vae_options={}):
        self.throw_exception_if_invalid()
        pixel_samples = None
+        do_tile = False
        try:
            memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
            model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
@ -699,6 +744,13 @@ class VAE:
                pixel_samples[x:x + batch_number] = out
        except model_management.OOM_EXCEPTION:
            logger.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
+            # NOTE: We don't know what tensors were allocated to stack variables at the time of the
+            # exception and the exception itself refs them all until we get out of this except block.
+            # So we just set a flag for tiler fallback so that tensor gc can happen once the
+            # exception is fully off the books.
+            do_tile = True
+
+        if do_tile:
            dims = samples_in.ndim - 2
            if dims == 1 or self.extra_1d_channel is not None:
                pixel_samples = self.decode_tiled_1d(samples_in)
@ -747,6 +799,7 @@ class VAE:
        self.throw_exception_if_invalid()
        pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
        pixel_samples = pixel_samples.movedim(-1, 1)
+        do_tile = False
        if self.latent_dim == 3 and pixel_samples.ndim < 5:
            if not self.not_video:
                pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
@ -768,6 +821,13 @@ class VAE:

        except model_management.OOM_EXCEPTION:
            logger.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
+            # NOTE: We don't know what tensors were allocated to stack variables at the time of the
+            # exception and the exception itself refs them all until we get out of this except block.
+            # So we just set a flag for tiler fallback so that tensor gc can happen once the
+            # exception is fully off the books.
+            do_tile = True
+
+        if do_tile:
            if self.latent_dim == 3:
                tile = 256
                overlap = tile // 4
@ -930,6 +990,7 @@ class TEModel(Enum):
    QWEN25_3B = 10
    QWEN25_7B = 11
    BYT5_SMALL_GLYPH = 12
+    GEMMA_3_4B = 13


 def detect_te_model(sd):
@ -953,6 +1014,8 @@ def detect_te_model(sd):
            return TEModel.BYT5_SMALL_GLYPH
        return TEModel.T5_BASE
    if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
+        if 'model.layers.0.self_attn.q_norm.weight' in sd:
+            return TEModel.GEMMA_3_4B
        return TEModel.GEMMA_2_2B
    if 'model.layers.0.self_attn.k_proj.bias' in sd:
        weight = sd['model.layers.0.self_attn.k_proj.bias']
@ -1059,6 +1122,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = lumina2.te(**llama_detect(clip_data))
            clip_target.tokenizer = lumina2.LuminaTokenizer
            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+        elif te_model == TEModel.GEMMA_3_4B:
+            clip_target.clip = lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b")
+            clip_target.tokenizer = lumina2.NTokenizer
+            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
        elif te_model == TEModel.LLAMA3_8:
            clip_target.clip = hidream.hidream_clip(**llama_detect(clip_data),
                                                    clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
--- a/comfy/text_encoders/hunyuan_image.py
+++ b/comfy/text_encoders/hunyuan_image.py
@ -79,7 +79,13 @@ class HunyuanImageTEModel(QwenImageTEModel):
            self.byt5_small = None

    def encode_token_weights(self, token_weight_pairs):
-        cond, p, extra = super().encode_token_weights(token_weight_pairs)
+        tok_pairs = token_weight_pairs["qwen25_7b"][0]
+        template_end = -1
+        if tok_pairs[0][0] == 27:
+            if len(tok_pairs) > 36:  # refiner prompt uses a fixed 36 template_end
+                template_end = 36
+
+        cond, p, extra = super().encode_token_weights(token_weight_pairs, template_end=template_end)
        if self.byt5_small is not None and "byt5" in token_weight_pairs:
            out = self.byt5_small.encode_token_weights(token_weight_pairs["byt5"])
            extra["conditioning_byt5small"] = out[0]
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -4,11 +4,14 @@ from typing import Optional, Any

 import torch
 import torch.nn as nn
+import logging

 from . import qwen_vl
 from ..ldm.common_dit import rms_norm
 from ..ldm.modules.attention import optimized_attention_for_device

+logger = logging.getLogger(__name__)
+

@dataclass
 class Llama2Config:
@ -27,6 +30,9 @@ class Llama2Config:
    mlp_activation = "silu"
    qkv_bias = False
    rope_dims = None
+    q_norm = None
+    k_norm = None
+    rope_scale = None


@dataclass
@ -46,6 +52,9 @@ class Qwen25_3BConfig:
    mlp_activation = "silu"
    qkv_bias = True
    rope_dims = None
+    q_norm = None
+    k_norm = None
+    rope_scale = None


@dataclass
@ -65,6 +74,9 @@ class Qwen25_7BVLI_Config:
    mlp_activation = "silu"
    qkv_bias = True
    rope_dims = [16, 24, 24]
+    q_norm = None
+    k_norm = None
+    rope_scale = None


@dataclass
@ -84,6 +96,33 @@ class Gemma2_2B_Config:
    mlp_activation = "gelu_pytorch_tanh"
    qkv_bias = False
    rope_dims = None
+    q_norm = None
+    k_norm = None
+    sliding_attention = None
+    rope_scale = None
+
+
+@dataclass
+class Gemma3_4B_Config:
+    vocab_size: int = 262208
+    hidden_size: int = 2560
+    intermediate_size: int = 10240
+    num_hidden_layers: int = 34
+    num_attention_heads: int = 8
+    num_key_value_heads: int = 4
+    max_position_embeddings: int = 131072
+    rms_norm_eps: float = 1e-6
+    rope_theta = [10000.0, 1000000.0]
+    transformer_type: str = "gemma3"
+    head_dim = 256
+    rms_norm_add = True
+    mlp_activation = "gelu_pytorch_tanh"
+    qkv_bias = False
+    rope_dims = None
+    q_norm = "gemma3"
+    k_norm = "gemma3"
+    sliding_attention = [False, False, False, False, False, 1024]
+    rope_scale = [1.0, 8.0]


 class RMSNorm(nn.Module):
@ -108,25 +147,40 @@ def rotate_half(x):
    return torch.cat((-x2, x1), dim=-1)


-def precompute_freqs_cis(head_dim, position_ids, theta, rope_dims=None, device=None):
-    theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
-    inv_freq = 1.0 / (theta ** (theta_numerator / head_dim))
+def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None):
+    if not isinstance(theta, list):
+        theta = [theta]

-    inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
-    position_ids_expanded = position_ids[:, None, :].float()
-    freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
-    emb = torch.cat((freqs, freqs), dim=-1)
-    cos = emb.cos()
-    sin = emb.sin()
-    if rope_dims is not None and position_ids.shape[0] > 1:
-        mrope_section = rope_dims * 2
-        cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
-        sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
-    else:
-        cos = cos.unsqueeze(1)
-        sin = sin.unsqueeze(1)
+    out = []
+    for index, t in enumerate(theta):
+        theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
+        inv_freq = 1.0 / (t ** (theta_numerator / head_dim))

-    return (cos, sin)
+        if rope_scale is not None:
+            if isinstance(rope_scale, list):
+                inv_freq /= rope_scale[index]
+            else:
+                inv_freq /= rope_scale
+
+        inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
+        position_ids_expanded = position_ids[:, None, :].float()
+        freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        cos = emb.cos()
+        sin = emb.sin()
+        if rope_dims is not None and position_ids.shape[0] > 1:
+            mrope_section = rope_dims * 2
+            cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
+            sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
+        else:
+            cos = cos.unsqueeze(1)
+            sin = sin.unsqueeze(1)
+        out.append((cos, sin))
+
+    if len(out) == 1:
+        return out[0]
+
+    return out


 def apply_rope(xq, xk, freqs_cis):
@ -154,6 +208,14 @@ class Attention(nn.Module):
        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
        self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)

+        self.q_norm = None
+        self.k_norm = None
+
+        if config.q_norm == "gemma3":
+            self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+        if config.k_norm == "gemma3":
+            self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+
    def forward(
            self,
            hidden_states: torch.Tensor,
@ -170,6 +232,11 @@ class Attention(nn.Module):
        xk = xk.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
        xv = xv.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)

+        if self.q_norm is not None:
+            xq = self.q_norm(xq)
+        if self.k_norm is not None:
+            xk = self.k_norm(xk)
+
        xq, xk = apply_rope(xq, xk, freqs_cis=freqs_cis)

        xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
@ -196,7 +263,7 @@ class MLP(nn.Module):


 class TransformerBlock(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
        super().__init__()
        self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
        self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
@ -231,7 +298,7 @@ class TransformerBlock(nn.Module):


 class TransformerBlockGemma2(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
        super().__init__()
        self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
        self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
@ -240,6 +307,13 @@ class TransformerBlockGemma2(nn.Module):
        self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
        self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)

+        if config.sliding_attention is not None:  # TODO: implement. (Not that necessary since models are trained on less than 1024 tokens)
+            self.sliding_attention = config.sliding_attention[index % len(config.sliding_attention)]
+        else:
+            self.sliding_attention = False
+
+        self.transformer_type = config.transformer_type
+
    def forward(
            self,
            x: torch.Tensor,
@ -247,6 +321,14 @@ class TransformerBlockGemma2(nn.Module):
            freqs_cis: Optional[torch.Tensor] = None,
            optimized_attention=None,
    ):
+        if self.transformer_type == 'gemma3':
+            if self.sliding_attention:
+                if x.shape[1] > self.sliding_attention:
+                    logger.warning("Warning: sliding attention not implemented, results may be incorrect")
+                freqs_cis = freqs_cis[1]
+            else:
+                freqs_cis = freqs_cis[0]
+
        # Self Attention
        residual = x
        x = self.input_layernorm(x)
@ -282,7 +364,7 @@ class Llama2_(nn.Module):
            device=device,
            dtype=dtype
        )
-        if self.config.transformer_type == "gemma2":
+        if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3":
            transformer = TransformerBlockGemma2
            self.normalize_in = True
        else:
@ -290,8 +372,8 @@ class Llama2_(nn.Module):
            self.normalize_in = False

        self.layers = nn.ModuleList([
-            transformer(config, device=device, dtype=dtype, ops=ops)
-            for _ in range(config.num_hidden_layers)
+            transformer(config, index=i, device=device, dtype=dtype, ops=ops)
+            for i in range(config.num_hidden_layers)
        ])
        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
        # self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
@ -311,6 +393,7 @@ class Llama2_(nn.Module):
        freqs_cis = precompute_freqs_cis(self.config.head_dim,
                                         position_ids,
                                         self.config.rope_theta,
+                                         self.config.rope_scale,
                                         self.config.rope_dims,
                                         device=x.device)

@ -436,6 +519,7 @@ class Qwen25_7BVLI(BaseLlama, torch.nn.Module):

        return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids)

+
 class Gemma2_2B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
@ -444,3 +528,13 @@ class Gemma2_2B(BaseLlama, torch.nn.Module):

        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype
+
+
+class Gemma3_4B(BaseLlama, torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        config = Gemma3_4B_Config(**config_dict)
+        self.num_layers = config.num_hidden_layers
+
+        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@ -13,6 +13,13 @@ class Gemma2BTokenizer(sd1_clip.SDTokenizer):
    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}

+class Gemma3_4BTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer = tokenizer_data.get("spiece_model", None)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+
+    def state_dict(self):
+        return {"spiece_model": self.tokenizer.serialize_model()}

 class LuminaTokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data=None):
@ -20,6 +27,9 @@ class LuminaTokenizer(sd1_clip.SD1Tokenizer):
            tokenizer_data = {}
        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma2_2b", tokenizer=Gemma2BTokenizer)

+class NTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_4b", tokenizer=Gemma3_4BTokenizer)

 class Gemma2_2BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options=None, textmodel_json_config=None):
@ -28,16 +38,23 @@ class Gemma2_2BModel(sd1_clip.SDClipModel):
        textmodel_json_config = textmodel_json_config or {}
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=Gemma2_2B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

+class Gemma3_4BModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

 class LuminaModel(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options=None):
+    def __init__(self, device="cpu", dtype=None, model_options=None, name="gemma2_2b", clip_model=Gemma2_2BModel):
        if model_options is None:
            model_options = {}
-        super().__init__(device=device, dtype=dtype, name="gemma2_2b", clip_model=Gemma2_2BModel, model_options=model_options)
+        super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options)



-def te(dtype_llama=None, llama_scaled_fp8=None):
+def te(dtype_llama=None, llama_scaled_fp8=None, model_type="gemma2_2b"):
+    if model_type == "gemma2_2b":
+        model = Gemma2_2BModel
+    elif model_type == "gemma3_4b":
+        model = Gemma3_4BModel
    class LuminaTEModel_(LuminaModel):
        def __init__(self, device="cpu", dtype=None, model_options=None):
            if model_options is None:
@ -47,6 +64,5 @@ def te(dtype_llama=None, llama_scaled_fp8=None):
                model_options["scaled_fp8"] = llama_scaled_fp8
            if dtype_llama is not None:
                dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-
+            super().__init__(device=device, dtype=dtype, name=model_type, model_options=model_options, clip_model=model)
    return LuminaTEModel_
--- a/comfy/text_encoders/qwen_image.py
+++ b/comfy/text_encoders/qwen_image.py
@ -24,13 +24,22 @@ class QwenImageTokenizer(sd1_clip.SD1Tokenizer):
        self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"

    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs):
-        if llama_template is None:
-            if len(images) > 0:
-                llama_text = self.llama_template_images.format(text)
-            else:
-                llama_text = self.llama_template.format(text)
+        skip_template = False
+        if text.startswith('<|im_start|>'):
+            skip_template = True
+        if text.startswith('<|start_header_id|>'):
+            skip_template = True
+
+        if skip_template:
+            llama_text = text
        else:
-            llama_text = llama_template.format(text)
+            if llama_template is None:
+                if len(images) > 0:
+                    llama_text = self.llama_template_images.format(text)
+                else:
+                    llama_text = self.llama_template.format(text)
+            else:
+                llama_text = llama_template.format(text)
        tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
        key_name = next(iter(tokens))
        embed_count = 0
@ -58,23 +67,23 @@ class QwenImageTEModel(sd1_clip.SD1ClipModel):
            model_options = {}
        super().__init__(device=device, dtype=dtype, name="qwen25_7b", clip_model=Qwen25_7BVLIModel, model_options=model_options)

-    def encode_token_weights(self, token_weight_pairs):
+    def encode_token_weights(self, token_weight_pairs, template_end=-1):
        out, pooled, extra = super().encode_token_weights(token_weight_pairs)
        tok_pairs = token_weight_pairs["qwen25_7b"][0]
        count_im_start = 0
-        template_end = 0
-        for i, v in enumerate(tok_pairs):
-            elem = v[0]
-            if not torch.is_tensor(elem):
-                if isinstance(elem, numbers.Integral):
-                    if elem == 151644 and count_im_start < 2:
-                        template_end = i
-                        count_im_start += 1
+        if template_end == -1:
+            for i, v in enumerate(tok_pairs):
+                elem = v[0]
+                if not torch.is_tensor(elem):
+                    if isinstance(elem, numbers.Integral):
+                        if elem == 151644 and count_im_start < 2:
+                            template_end = i
+                            count_im_start += 1

-        if out.shape[1] > (template_end + 3):
-            if tok_pairs[template_end + 1][0] == 872:
-                if tok_pairs[template_end + 2][0] == 198:
-                    template_end += 3
+            if out.shape[1] > (template_end + 3):
+                if tok_pairs[template_end + 1][0] == 872:
+                    if tok_pairs[template_end + 2][0] == 198:
+                        template_end += 3

        out = out[:, template_end:]

--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -68,6 +68,12 @@ if hasattr(torch.serialization, "add_safe_globals"):  # TODO: this was added in

    ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint"

+    # todo: upstream had a patch here for scalar, ours seems to work better, so i'm not sure
+    # def scalar(*args, **kwargs):
+    #     from numpy.core.multiarray import scalar as sc
+    #     return sc(*args, **kwargs)
+    # scalar.__module__ = "numpy.core.multiarray"
+
    try:
        from numpy.core.multiarray import scalar  # pylint: disable=no-name-in-module
    except (ImportError, ModuleNotFoundError):
--- a/comfy_api/latest/init.py
+++ b/comfy_api/latest/init.py
@ -8,8 +8,8 @@ from comfy_api.internal.async_to_sync import create_sync_class
 from comfy_api.latest._input import ImageInput, AudioInput, MaskInput, LatentInput, VideoInput
 from comfy_api.latest._input_impl import VideoFromFile, VideoFromComponents
 from comfy_api.latest._util import VideoCodec, VideoContainer, VideoComponents
-from comfy_api.latest._io import _IO as io  #noqa: F401
-from comfy_api.latest._ui import _UI as ui  #noqa: F401
+from . import _io as io
+from . import _ui as ui
 # from comfy_api.latest._resources import _RESOURCES as resources  #noqa: F401
 from comfy_execution.utils import get_executing_context
 from comfy_execution.progress import get_progress_state, PreviewImageTuple
@ -114,6 +114,10 @@ if TYPE_CHECKING:
    ComfyAPISync: Type[comfy_api.latest.generated.ComfyAPISyncStub.ComfyAPISyncStub]
 ComfyAPISync = create_sync_class(ComfyAPI_latest)

+# create new aliases for io and ui
+IO = io
+UI = ui
+
 __all__ = [
    "ComfyAPI",
    "ComfyAPISync",
@ -122,4 +126,7 @@ __all__ = [
    "Types",
    "ComfyExtension",
    "io",
+    "IO",
+    "ui",
+    "UI",
 ]
--- a/comfy_api/latest/_input/video_types.py
+++ b/comfy_api/latest/_input/video_types.py
@ -1,6 +1,6 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import Optional, Union
+from typing import Optional, Union, IO
 import io
 import av
 from comfy_api.util import VideoContainer, VideoCodec, VideoComponents
@ -23,7 +23,7 @@ class VideoInput(ABC):
    @abstractmethod
    def save_to(
        self,
-        path: str,
+        path: Union[str, IO[bytes]],
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
        metadata: Optional[dict] = None
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@ -336,11 +336,25 @@ class Combo(ComfyTypeIO):
    class Input(WidgetInput):
        """Combo input (dropdown)."""
        Type = str
-        def __init__(self, id: str, options: list[str]=None, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
-                    default: str=None, control_after_generate: bool=None,
-                    upload: UploadType=None, image_folder: FolderType=None,
-                    remote: RemoteOptions=None,
-                    socketless: bool=None):
+        def __init__(
+            self,
+            id: str,
+            options: list[str] | list[int] | type[Enum] = None,
+            display_name: str=None,
+            optional=False,
+            tooltip: str=None,
+            lazy: bool=None,
+            default: str | int | Enum = None,
+            control_after_generate: bool=None,
+            upload: UploadType=None,
+            image_folder: FolderType=None,
+            remote: RemoteOptions=None,
+            socketless: bool=None,
+        ):
+            if isinstance(options, type) and issubclass(options, Enum):
+                options = [v.value for v in options]
+            if isinstance(default, Enum):
+                default = default.value
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless)
            self.multiselect = False
            self.options = options
@ -1570,77 +1584,78 @@ class _UIOutput(ABC):
        ...


-class _IO:
-    FolderType = FolderType
-    UploadType = UploadType
-    RemoteOptions = RemoteOptions
-    NumberDisplay = NumberDisplay
+__all__ = [
+    "FolderType",
+    "UploadType",
+    "RemoteOptions",
+    "NumberDisplay",

-    comfytype = staticmethod(comfytype)
-    Custom = staticmethod(Custom)
-    Input = Input
-    WidgetInput = WidgetInput
-    Output = Output
-    ComfyTypeI = ComfyTypeI
-    ComfyTypeIO = ComfyTypeIO
-    #---------------------------------
+    "comfytype",
+    "Custom",
+    "Input",
+    "WidgetInput",
+    "Output",
+    "ComfyTypeI",
+    "ComfyTypeIO",
    # Supported Types
-    Boolean = Boolean
-    Int = Int
-    Float = Float
-    String = String
-    Combo = Combo
-    MultiCombo = MultiCombo
-    Image = Image
-    WanCameraEmbedding = WanCameraEmbedding
-    Webcam = Webcam
-    Mask = Mask
-    Latent = Latent
-    Conditioning = Conditioning
-    Sampler = Sampler
-    Sigmas = Sigmas
-    Noise = Noise
-    Guider = Guider
-    Clip = Clip
-    ControlNet = ControlNet
-    Vae = Vae
-    Model = Model
-    ClipVision = ClipVision
-    ClipVisionOutput = ClipVisionOutput
-    AudioEncoderOutput = AudioEncoderOutput
-    StyleModel = StyleModel
-    Gligen = Gligen
-    UpscaleModel = UpscaleModel
-    Audio = Audio
-    Video = Video
-    SVG = SVG
-    LoraModel = LoraModel
-    LossMap = LossMap
-    Voxel = Voxel
-    Mesh = Mesh
-    Hooks = Hooks
-    HookKeyframes = HookKeyframes
-    TimestepsRange = TimestepsRange
-    LatentOperation = LatentOperation
-    FlowControl = FlowControl
-    Accumulation = Accumulation
-    Load3DCamera = Load3DCamera
-    Load3D = Load3D
-    Load3DAnimation = Load3DAnimation
-    Photomaker = Photomaker
-    Point = Point
-    FaceAnalysis = FaceAnalysis
-    BBOX = BBOX
-    SEGS = SEGS
-    AnyType = AnyType
-    MultiType = MultiType
-    #---------------------------------
-    HiddenHolder = HiddenHolder
-    Hidden = Hidden
-    NodeInfoV1 = NodeInfoV1
-    NodeInfoV3 = NodeInfoV3
-    Schema = Schema
-    ComfyNode = ComfyNode
-    NodeOutput = NodeOutput
-    add_to_dict_v1 = staticmethod(add_to_dict_v1)
-    add_to_dict_v3 = staticmethod(add_to_dict_v3)
+    "Boolean",
+    "Int",
+    "Float",
+    "String",
+    "Combo",
+    "MultiCombo",
+    "Image",
+    "WanCameraEmbedding",
+    "Webcam",
+    "Mask",
+    "Latent",
+    "Conditioning",
+    "Sampler",
+    "Sigmas",
+    "Noise",
+    "Guider",
+    "Clip",
+    "ControlNet",
+    "Vae",
+    "Model",
+    "ClipVision",
+    "ClipVisionOutput",
+    "AudioEncoder",
+    "AudioEncoderOutput",
+    "StyleModel",
+    "Gligen",
+    "UpscaleModel",
+    "Audio",
+    "Video",
+    "SVG",
+    "LoraModel",
+    "LossMap",
+    "Voxel",
+    "Mesh",
+    "Hooks",
+    "HookKeyframes",
+    "TimestepsRange",
+    "LatentOperation",
+    "FlowControl",
+    "Accumulation",
+    "Load3DCamera",
+    "Load3D",
+    "Load3DAnimation",
+    "Photomaker",
+    "Point",
+    "FaceAnalysis",
+    "BBOX",
+    "SEGS",
+    "AnyType",
+    "MultiType",
+    # Other classes
+    "HiddenHolder",
+    "Hidden",
+    "NodeInfoV1",
+    "NodeInfoV3",
+    "Schema",
+    "ComfyNode",
+    "NodeOutput",
+    "add_to_dict_v1",
+    "add_to_dict_v3",
+]
--- a/comfy_api/latest/_ui.py
+++ b/comfy_api/latest/_ui.py
@ -455,15 +455,16 @@ class PreviewText(_UIOutput):
        return {"text": (self.value,)}


-class _UI:
-    SavedResult = SavedResult
-    SavedImages = SavedImages
-    SavedAudios = SavedAudios
-    ImageSaveHelper = ImageSaveHelper
-    AudioSaveHelper = AudioSaveHelper
-    PreviewImage = PreviewImage
-    PreviewMask = PreviewMask
-    PreviewAudio = PreviewAudio
-    PreviewVideo = PreviewVideo
-    PreviewUI3D = PreviewUI3D
-    PreviewText = PreviewText
+__all__ = [
+    "SavedResult",
+    "SavedImages",
+    "SavedAudios",
+    "ImageSaveHelper",
+    "AudioSaveHelper",
+    "PreviewImage",
+    "PreviewMask",
+    "PreviewAudio",
+    "PreviewVideo",
+    "PreviewUI3D",
+    "PreviewText",
+]
--- a/comfy_api_nodes/apinode_utils.py
+++ b/comfy_api_nodes/apinode_utils.py
@ -3,6 +3,7 @@ import aiohttp
 import io
 import logging
 import mimetypes
+import os
 from typing import Optional, Union
 from comfy.utils import common_upscale
 from comfy_api.input_impl import VideoFromFile
@ -18,7 +19,7 @@ from comfy_api_nodes.apis.client import (
    UploadResponse,
 )
 from comfy.cmd.server import PromptServer
-
+from comfy.cli_args import args

 import numpy as np
 from PIL import Image
@ -30,7 +31,9 @@ from io import BytesIO
 import av


-async def download_url_to_video_output(video_url: str, timeout: int = None) -> VideoFromFile:
+async def download_url_to_video_output(
+    video_url: str, timeout: int = None, auth_kwargs: Optional[dict[str, str]] = None
+) -> VideoFromFile:
    """Downloads a video from a URL and returns a `VIDEO` output.

    Args:
@ -39,7 +42,7 @@ async def download_url_to_video_output(video_url: str, timeout: int = None) -> V
    Returns:
        A Comfy node `VIDEO` output.
    """
-    video_io = await download_url_to_bytesio(video_url, timeout)
+    video_io = await download_url_to_bytesio(video_url, timeout, auth_kwargs=auth_kwargs)
    if video_io is None:
        error_msg = f"Failed to download video from {video_url}"
        logging.error(error_msg)
@ -152,7 +155,7 @@ def validate_aspect_ratio(
            raise TypeError(
                f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
            )
-        elif calculated_ratio > maximum_ratio:
+        if calculated_ratio > maximum_ratio:
            raise TypeError(
                f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
            )
@ -164,7 +167,9 @@ def mimetype_to_extension(mime_type: str) -> str:
    return mime_type.split("/")[-1].lower()


-async def download_url_to_bytesio(url: str, timeout: int = None) -> BytesIO:
+async def download_url_to_bytesio(
+    url: str, timeout: int = None, auth_kwargs: Optional[dict[str, str]] = None
+) -> BytesIO:
    """Downloads content from a URL using requests and returns it as BytesIO.

    Args:
@ -174,9 +179,18 @@ async def download_url_to_bytesio(url: str, timeout: int = None) -> BytesIO:
    Returns:
        BytesIO object containing the downloaded content.
    """
+    headers = {}
+    if url.startswith("/proxy/"):
+        url = str(args.comfy_api_base).rstrip("/") + url
+        auth_token = auth_kwargs.get("auth_token")
+        comfy_api_key = auth_kwargs.get("comfy_api_key")
+        if auth_token:
+            headers["Authorization"] = f"Bearer {auth_token}"
+        elif comfy_api_key:
+            headers["X-API-KEY"] = comfy_api_key
    timeout_cfg = aiohttp.ClientTimeout(total=timeout) if timeout else None
    async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
-        async with session.get(url) as resp:
+        async with session.get(url, headers=headers) as resp:
            resp.raise_for_status()  # Raises HTTPError for bad responses (4XX or 5XX)
            return BytesIO(await resp.read())

@ -256,7 +270,7 @@ def tensor_to_bytesio(
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

    Returns:
-        Named BytesIO object containing the image data.
+        Named BytesIO object containing the image data, with pointer set to the start of buffer.
    """
    if not mime_type:
        mime_type = "image/png"
@ -418,7 +432,7 @@ async def upload_video_to_comfyapi(
                    f"Video duration ({actual_duration:.2f}s) exceeds the maximum allowed ({max_duration}s)."
                )
        except Exception as e:
-            logging.error(f"Error getting video duration: {e}")
+            logging.error("Error getting video duration: %s", str(e))
            raise ValueError(f"Could not verify video duration from source: {e}") from e

    upload_mime_type = f"video/{container.value.lower()}"
@ -689,3 +703,16 @@ def image_tensor_pair_to_batch(
            "center",
        ).movedim(1, -1)
    return torch.cat((image1, image2), dim=0)
+
+
+def get_size(path_or_object: Union[str, io.BytesIO]) -> int:
+    if isinstance(path_or_object, str):
+        return os.path.getsize(path_or_object)
+    return len(path_or_object.getvalue())
+
+
+def validate_container_format_is_mp4(video: VideoInput) -> None:
+    """Validates video container format is MP4."""
+    container_format = video.get_container_format()
+    if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
+        raise ValueError(f"Only MP4 container format supported. Got: {container_format}")
--- a/comfy_api_nodes/apis/init.py
+++ b/comfy_api_nodes/apis/init.py
@ -2,6 +2,7 @@
 #   filename:  filtered-openapi.yaml
 #   timestamp: 2025-07-30T08:54:00+00:00

+# pylint: disable
 from __future__ import annotations

 from datetime import date, datetime
@ -1320,6 +1321,7 @@ class KlingTextToVideoModelName(str, Enum):
    kling_v1 = 'kling-v1'
    kling_v1_6 = 'kling-v1-6'
    kling_v2_1_master = 'kling-v2-1-master'
+    kling_v2_5_turbo = 'kling-v2-5-turbo'


 class KlingVideoGenAspectRatio(str, Enum):
@ -1354,6 +1356,7 @@ class KlingVideoGenModelName(str, Enum):
    kling_v2_master = 'kling-v2-master'
    kling_v2_1 = 'kling-v2-1'
    kling_v2_1_master = 'kling-v2-1-master'
+    kling_v2_5_turbo = 'kling-v2-5-turbo'


 class KlingVideoResult(BaseModel):
--- a/comfy_api_nodes/apis/client.py
+++ b/comfy_api_nodes/apis/client.py
@ -95,9 +95,10 @@ import aiohttp
 import asyncio
 import logging
 import io
+import os
 import socket
 from aiohttp.client_exceptions import ClientError, ClientResponseError
-from typing import Dict, Type, Optional, Any, TypeVar, Generic, Callable, Tuple
+from typing import Type, Optional, Any, TypeVar, Generic, Callable
 from enum import Enum
 import json
 from urllib.parse import urljoin, urlparse
@ -174,7 +175,7 @@ class ApiClient:
        max_retries: int = 3,
        retry_delay: float = 1.0,
        retry_backoff_factor: float = 2.0,
-        retry_status_codes: Optional[Tuple[int, ...]] = None,
+        retry_status_codes: Optional[tuple[int, ...]] = None,
        session: Optional[aiohttp.ClientSession] = None,
    ):
        self.base_url = base_url
@ -198,9 +199,9 @@ class ApiClient:

    @staticmethod
    def _create_json_payload_args(
-        data: Optional[Dict[str, Any]] = None,
-        headers: Optional[Dict[str, str]] = None,
-    ) -> Dict[str, Any]:
+        data: Optional[dict[str, Any]] = None,
+        headers: Optional[dict[str, str]] = None,
+    ) -> dict[str, Any]:
        return {
            "json": data,
            "headers": headers,
@ -208,24 +209,27 @@ class ApiClient:

    def _create_form_data_args(
        self,
-        data: Dict[str, Any] | None,
-        files: Dict[str, Any] | None,
-        headers: Optional[Dict[str, str]] = None,
+        data: dict[str, Any] | None,
+        files: dict[str, Any] | None,
+        headers: Optional[dict[str, str]] = None,
        multipart_parser: Callable | None = None,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        if headers and "Content-Type" in headers:
            del headers["Content-Type"]

        if multipart_parser and data:
            data = multipart_parser(data)

-        form = aiohttp.FormData(default_to_multipart=True)
-        if data:  # regular text fields
-            for k, v in data.items():
-                if v is None:
-                    continue  # aiohttp fails to serialize "None" values
-                # aiohttp expects strings or bytes; convert enums etc.
-                form.add_field(k, str(v) if not isinstance(v, (bytes, bytearray)) else v)
+        if isinstance(data, aiohttp.FormData):
+            form = data  # If the parser already returned a FormData, pass it through
+        else:
+            form = aiohttp.FormData(default_to_multipart=True)
+            if data:  # regular text fields
+                for k, v in data.items():
+                    if v is None:
+                        continue  # aiohttp fails to serialize "None" values
+                    # aiohttp expects strings or bytes; convert enums etc.
+                    form.add_field(k, str(v) if not isinstance(v, (bytes, bytearray)) else v)

        if files:
            file_iter = files if isinstance(files, list) else files.items()
@ -250,9 +254,9 @@ class ApiClient:

    @staticmethod
    def _create_urlencoded_form_data_args(
-        data: Dict[str, Any],
-        headers: Optional[Dict[str, str]] = None,
-    ) -> Dict[str, Any]:
+        data: dict[str, Any],
+        headers: Optional[dict[str, str]] = None,
+    ) -> dict[str, Any]:
        headers = headers or {}
        headers["Content-Type"] = "application/x-www-form-urlencoded"
        return {
@ -260,7 +264,7 @@ class ApiClient:
            "headers": headers,
        }

-    def get_headers(self) -> Dict[str, str]:
+    def get_headers(self) -> dict[str, str]:
        """Get headers for API requests, including authentication if available"""
        headers = {"Content-Type": "application/json", "Accept": "application/json"}

@ -271,7 +275,7 @@ class ApiClient:

        return headers

-    async def _check_connectivity(self, target_url: str) -> Dict[str, bool]:
+    async def _check_connectivity(self, target_url: str) -> dict[str, bool]:
        """
        Check connectivity to determine if network issues are local or server-related.

@ -312,14 +316,14 @@ class ApiClient:
        self,
        method: str,
        path: str,
-        params: Optional[Dict[str, Any]] = None,
-        data: Optional[Dict[str, Any]] = None,
-        files: Optional[Dict[str, Any] | list[tuple[str, Any]]] = None,
-        headers: Optional[Dict[str, str]] = None,
+        params: Optional[dict[str, Any]] = None,
+        data: Optional[dict[str, Any]] = None,
+        files: Optional[dict[str, Any] | list[tuple[str, Any]]] = None,
+        headers: Optional[dict[str, str]] = None,
        content_type: str = "application/json",
        multipart_parser: Callable | None = None,
        retry_count: int = 0,  # Used internally for tracking retries
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """
        Make an HTTP request to the API with automatic retries for transient errors.

@ -355,10 +359,10 @@ class ApiClient:
        if params:
            params = {k: v for k, v in params.items() if v is not None}  # aiohttp fails to serialize None values

-        logging.debug(f"[DEBUG] Request Headers: {request_headers}")
-        logging.debug(f"[DEBUG] Files: {files}")
-        logging.debug(f"[DEBUG] Params: {params}")
-        logging.debug(f"[DEBUG] Data: {data}")
+        logging.debug("[DEBUG] Request Headers: %s", request_headers)
+        logging.debug("[DEBUG] Files: %s", files)
+        logging.debug("[DEBUG] Params: %s", params)
+        logging.debug("[DEBUG] Data: %s", data)

        if content_type == "application/x-www-form-urlencoded":
            payload_args = self._create_urlencoded_form_data_args(data or {}, request_headers)
@ -481,7 +485,7 @@ class ApiClient:
            retry_delay: Initial delay between retries in seconds
            retry_backoff_factor: Multiplier for the delay after each retry
        """
-        headers: Dict[str, str] = {}
+        headers: dict[str, str] = {}
        skip_auto_headers: set[str] = set()
        if content_type:
            headers["Content-Type"] = content_type
@ -499,7 +503,9 @@ class ApiClient:
        else:
            raise ValueError("File must be BytesIO or str path")

-        operation_id = f"upload_{upload_url.split('/')[-1]}_{uuid.uuid4().hex[:8]}"
+        parsed = urlparse(upload_url)
+        basename = os.path.basename(parsed.path) or parsed.netloc or "upload"
+        operation_id = f"upload_{basename}_{uuid.uuid4().hex[:8]}"
        request_logger.log_request_response(
            operation_id=operation_id,
            request_method="PUT",
@ -532,7 +538,7 @@ class ApiClient:
                    request_method="PUT",
                    request_url=upload_url,
                    response_status_code=e.status if hasattr(e, "status") else None,
-                    response_headers=dict(e.headers) if getattr(e, "headers") else None,  # pylint: disable=no-member
+                    response_headers=dict(e.headers) if hasattr(e, "headers") else None,  # pylint: disable=no-member
                    response_content=None,
                    error_message=f"{type(e).__name__}: {str(e)}",
                )
@ -552,7 +558,7 @@ class ApiClient:
        *req_meta,
        retry_count: int,
        response_content: dict | str = "",
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        status_code = exc.status
        if status_code == 401:
            user_friendly = "Unauthorized: Please login first to use this node."
@ -586,9 +592,9 @@ class ApiClient:
            error_message=f"HTTP Error {exc.status}",
        )

-        logging.debug(f"[DEBUG] API Error: {user_friendly} (Status: {status_code})")
+        logging.debug("[DEBUG] API Error: %s (Status: %s)", user_friendly, status_code)
        if response_content:
-            logging.debug(f"[DEBUG] Response content: {response_content}")
+            logging.debug("[DEBUG] Response content: %s", response_content)

        # Retry if eligible
        if status_code in self.retry_status_codes and retry_count < self.max_retries:
@ -653,7 +659,7 @@ class ApiEndpoint(Generic[T, R]):
        method: HttpMethod,
        request_model: Type[T],
        response_model: Type[R],
-        query_params: Optional[Dict[str, Any]] = None,
+        query_params: Optional[dict[str, Any]] = None,
    ):
        """Initialize an API endpoint definition.

@ -678,11 +684,11 @@ class SynchronousOperation(Generic[T, R]):
        self,
        endpoint: ApiEndpoint[T, R],
        request: T,
-        files: Optional[Dict[str, Any] | list[tuple[str, Any]]] = None,
+        files: Optional[dict[str, Any] | list[tuple[str, Any]]] = None,
        api_base: str | None = None,
        auth_token: Optional[str] = None,
        comfy_api_key: Optional[str] = None,
-        auth_kwargs: Optional[Dict[str, str]] = None,
+        auth_kwargs: Optional[dict[str, str]] = None,
        timeout: float = 7200.0,
        verify_ssl: bool = True,
        content_type: str = "application/json",
@ -723,7 +729,7 @@ class SynchronousOperation(Generic[T, R]):
            )

        try:
-            request_dict: Optional[Dict[str, Any]]
+            request_dict: Optional[dict[str, Any]]
            if isinstance(self.request, EmptyRequest):
                request_dict = None
            else:
@ -732,11 +738,9 @@ class SynchronousOperation(Generic[T, R]):
                    if isinstance(v, Enum):
                        request_dict[k] = v.value

-            logging.debug(
-                f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}"
-            )
-            logging.debug(f"[DEBUG] Request Data: {json.dumps(request_dict, indent=2)}")
-            logging.debug(f"[DEBUG] Query Params: {self.endpoint.query_params}")
+            logging.debug("[DEBUG] API Request: %s %s", self.endpoint.method.value, self.endpoint.path)
+            logging.debug("[DEBUG] Request Data: %s", json.dumps(request_dict, indent=2))
+            logging.debug("[DEBUG] Query Params: %s", self.endpoint.query_params)

            response_json = await client.request(
                self.endpoint.method.value,
@ -751,11 +755,11 @@ class SynchronousOperation(Generic[T, R]):
            logging.debug("=" * 50)
            logging.debug("[DEBUG] RESPONSE DETAILS:")
            logging.debug("[DEBUG] Status Code: 200 (Success)")
-            logging.debug(f"[DEBUG] Response Body: {json.dumps(response_json, indent=2)}")
+            logging.debug("[DEBUG] Response Body: %s", json.dumps(response_json, indent=2))
            logging.debug("=" * 50)

            parsed_response = self.endpoint.response_model.model_validate(response_json)
-            logging.debug(f"[DEBUG] Parsed Response: {parsed_response}")
+            logging.debug("[DEBUG] Parsed Response: %s", parsed_response)
            return parsed_response
        finally:
            if owns_client:
@ -778,14 +782,16 @@ class PollingOperation(Generic[T, R]):
        poll_endpoint: ApiEndpoint[EmptyRequest, R],
        completed_statuses: list[str],
        failed_statuses: list[str],
-        status_extractor: Callable[[R], str],
-        progress_extractor: Callable[[R], float] | None = None,
-        result_url_extractor: Callable[[R], str] | None = None,
+        *,
+        status_extractor: Callable[[R], Optional[str]],
+        progress_extractor: Callable[[R], Optional[float]] | None = None,
+        result_url_extractor: Callable[[R], Optional[str]] | None = None,
+        price_extractor: Callable[[R], Optional[float]] | None = None,
        request: Optional[T] = None,
        api_base: str | None = None,
        auth_token: Optional[str] = None,
        comfy_api_key: Optional[str] = None,
-        auth_kwargs: Optional[Dict[str, str]] = None,
+        auth_kwargs: Optional[dict[str, str]] = None,
        poll_interval: float = 5.0,
        max_poll_attempts: int = 120,  # Default max polling attempts (10 minutes with 5s interval)
        max_retries: int = 3,  # Max retries per individual API call
@ -811,10 +817,12 @@ class PollingOperation(Generic[T, R]):
        self.status_extractor = status_extractor or (lambda x: getattr(x, "status", None))
        self.progress_extractor = progress_extractor
        self.result_url_extractor = result_url_extractor
+        self.price_extractor = price_extractor
        self.node_id = node_id
        self.completed_statuses = completed_statuses
        self.failed_statuses = failed_statuses
        self.final_response: Optional[R] = None
+        self.extracted_price: Optional[float] = None

    async def execute(self, client: Optional[ApiClient] = None) -> R:
        owns_client = client is None
@ -836,6 +844,8 @@ class PollingOperation(Generic[T, R]):
    def _display_text_on_node(self, text: str):
        if not self.node_id:
            return
+        if self.extracted_price is not None:
+            text = f"Price: ${self.extracted_price}\n{text}"
        PromptServer.instance.send_progress_text(text, self.node_id)

    def _display_time_progress_on_node(self, time_completed: int | float):
@ -871,18 +881,19 @@ class PollingOperation(Generic[T, R]):
        status = TaskStatus.PENDING
        for poll_count in range(1, self.max_poll_attempts + 1):
            try:
-                logging.debug(f"[DEBUG] Polling attempt #{poll_count}")
+                logging.debug("[DEBUG] Polling attempt #%s", poll_count)

-                request_dict = (
-                    None if self.request is None else self.request.model_dump(exclude_none=True)
-                )
+                request_dict = None if self.request is None else self.request.model_dump(exclude_none=True)

                if poll_count == 1:
                    logging.debug(
-                        f"[DEBUG] Poll Request: {self.poll_endpoint.method.value} {self.poll_endpoint.path}"
+                        "[DEBUG] Poll Request: %s %s",
+                        self.poll_endpoint.method.value,
+                        self.poll_endpoint.path,
                    )
                    logging.debug(
-                        f"[DEBUG] Poll Request Data: {json.dumps(request_dict, indent=2) if request_dict else 'None'}"
+                        "[DEBUG] Poll Request Data: %s",
+                        json.dumps(request_dict, indent=2) if request_dict else "None",
                    )

                # Query task status
@ -897,7 +908,7 @@ class PollingOperation(Generic[T, R]):

                # Check if task is complete
                status = self._check_task_status(response_obj)
-                logging.debug(f"[DEBUG] Task Status: {status}")
+                logging.debug("[DEBUG] Task Status: %s", status)

                # If progress extractor is provided, extract progress
                if self.progress_extractor:
@ -905,13 +916,18 @@ class PollingOperation(Generic[T, R]):
                    if new_progress is not None:
                        progress.update_absolute(new_progress, total=PROGRESS_BAR_MAX)

+                if self.price_extractor:
+                    price = self.price_extractor(response_obj)
+                    if price is not None:
+                        self.extracted_price = price
+
                if status == TaskStatus.COMPLETED:
                    message = "Task completed successfully"
                    if self.result_url_extractor:
                        result_url = self.result_url_extractor(response_obj)
                        if result_url:
                            message = f"Result URL: {result_url}"
-                    logging.debug(f"[DEBUG] {message}")
+                    logging.debug("[DEBUG] %s", message)
                    self._display_text_on_node(message)
                    self.final_response = response_obj
                    if self.progress_extractor:
@ -919,7 +935,7 @@ class PollingOperation(Generic[T, R]):
                    return self.final_response
                if status == TaskStatus.FAILED:
                    message = f"Task failed: {json.dumps(resp)}"
-                    logging.error(f"[DEBUG] {message}")
+                    logging.error("[DEBUG] %s", message)
                    raise Exception(message)
                logging.debug("[DEBUG] Task still pending, continuing to poll...")
                # Task pending – wait
@ -933,7 +949,12 @@ class PollingOperation(Generic[T, R]):
                    raise Exception(
                        f"Polling aborted after {consecutive_errors} network errors: {str(e)}"
                    ) from e
-                logging.warning("Network error (%s/%s): %s", consecutive_errors, max_consecutive_errors, str(e))
+                logging.warning(
+                    "Network error (%s/%s): %s",
+                    consecutive_errors,
+                    max_consecutive_errors,
+                    str(e),
+                )
                await asyncio.sleep(self.poll_interval)
            except Exception as e:
                # For other errors, increment count and potentially abort
@ -943,10 +964,13 @@ class PollingOperation(Generic[T, R]):
                        f"Polling aborted after {consecutive_errors} consecutive errors: {str(e)}"
                    ) from e

-                logging.error(f"[DEBUG] Polling error: {str(e)}")
+                logging.error("[DEBUG] Polling error: %s", str(e))
                logging.warning(
-                    f"Error during polling (attempt {poll_count}/{self.max_poll_attempts}): {str(e)}. "
-                    f"Will retry in {self.poll_interval} seconds."
+                    "Error during polling (attempt %s/%s): %s. Will retry in %s seconds.",
+                    poll_count,
+                    self.max_poll_attempts,
+                    str(e),
+                    self.poll_interval,
                )
                await asyncio.sleep(self.poll_interval)

--- a/comfy_api_nodes/apis/gemini_api.py
+++ b/comfy_api_nodes/apis/gemini_api.py
@ -1,19 +1,22 @@
-from __future__ import annotations
-
-from typing import List, Optional
+from typing import Optional

 from comfy_api_nodes.apis import GeminiGenerationConfig, GeminiContent, GeminiSafetySetting, GeminiSystemInstructionContent, GeminiTool, GeminiVideoMetadata
 from pydantic import BaseModel


+class GeminiImageConfig(BaseModel):
+    aspectRatio: Optional[str] = None
+
+
 class GeminiImageGenerationConfig(GeminiGenerationConfig):
-    responseModalities: Optional[List[str]] = None
+    responseModalities: Optional[list[str]] = None
+    imageConfig: Optional[GeminiImageConfig] = None


 class GeminiImageGenerateContentRequest(BaseModel):
-    contents: List[GeminiContent]
+    contents: list[GeminiContent]
    generationConfig: Optional[GeminiImageGenerationConfig] = None
-    safetySettings: Optional[List[GeminiSafetySetting]] = None
+    safetySettings: Optional[list[GeminiSafetySetting]] = None
    systemInstruction: Optional[GeminiSystemInstructionContent] = None
-    tools: Optional[List[GeminiTool]] = None
+    tools: Optional[list[GeminiTool]] = None
    videoMetadata: Optional[GeminiVideoMetadata] = None
--- a/comfy_api_nodes/apis/pika_defs.py
+++ b/comfy_api_nodes/apis/pika_defs.py
@ -0,0 +1,100 @@
+from typing import Optional
+from enum import Enum
+from pydantic import BaseModel, Field
+
+
+class Pikaffect(str, Enum):
+    Cake_ify = "Cake-ify"
+    Crumble = "Crumble"
+    Crush = "Crush"
+    Decapitate = "Decapitate"
+    Deflate = "Deflate"
+    Dissolve = "Dissolve"
+    Explode = "Explode"
+    Eye_pop = "Eye-pop"
+    Inflate = "Inflate"
+    Levitate = "Levitate"
+    Melt = "Melt"
+    Peel = "Peel"
+    Poke = "Poke"
+    Squish = "Squish"
+    Ta_da = "Ta-da"
+    Tear = "Tear"
+
+
+class PikaBodyGenerate22C2vGenerate22PikascenesPost(BaseModel):
+    aspectRatio: Optional[float] = Field(None, description='Aspect ratio (width / height)')
+    duration: Optional[int] = Field(5)
+    ingredientsMode: str = Field(...)
+    negativePrompt: Optional[str] = Field(None)
+    promptText: Optional[str] = Field(None)
+    resolution: Optional[str] = Field('1080p')
+    seed: Optional[int] = Field(None)
+
+
+class PikaGenerateResponse(BaseModel):
+    video_id: str = Field(...)
+
+
+class PikaBodyGenerate22I2vGenerate22I2vPost(BaseModel):
+    duration: Optional[int] = 5
+    negativePrompt: Optional[str] = Field(None)
+    promptText: Optional[str] = Field(None)
+    resolution: Optional[str] = '1080p'
+    seed: Optional[int] = Field(None)
+
+
+class PikaBodyGenerate22KeyframeGenerate22PikaframesPost(BaseModel):
+    duration: Optional[int] = Field(None, ge=5, le=10)
+    negativePrompt: Optional[str] = Field(None)
+    promptText: str = Field(...)
+    resolution: Optional[str] = '1080p'
+    seed: Optional[int] = Field(None)
+
+
+class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel):
+    aspectRatio: Optional[float] = Field(
+        1.7777777777777777,
+        description='Aspect ratio (width / height)',
+        ge=0.4,
+        le=2.5,
+    )
+    duration: Optional[int] = 5
+    negativePrompt: Optional[str] = Field(None)
+    promptText: str = Field(...)
+    resolution: Optional[str] = '1080p'
+    seed: Optional[int] = Field(None)
+
+
+class PikaBodyGeneratePikadditionsGeneratePikadditionsPost(BaseModel):
+    negativePrompt: Optional[str] = Field(None)
+    promptText: Optional[str] = Field(None)
+    seed: Optional[int] = Field(None)
+
+
+class PikaBodyGeneratePikaffectsGeneratePikaffectsPost(BaseModel):
+    negativePrompt: Optional[str] = Field(None)
+    pikaffect: Optional[str] = None
+    promptText: Optional[str] = Field(None)
+    seed: Optional[int] = Field(None)
+
+
+class PikaBodyGeneratePikaswapsGeneratePikaswapsPost(BaseModel):
+    negativePrompt: Optional[str] = Field(None)
+    promptText: Optional[str] = Field(None)
+    seed: Optional[int] = Field(None)
+    modifyRegionRoi: Optional[str] = Field(None)
+
+
+class PikaStatusEnum(str, Enum):
+    queued = "queued"
+    started = "started"
+    finished = "finished"
+    failed = "failed"
+
+
+class PikaVideoResponse(BaseModel):
+    id: str = Field(...)
+    progress: Optional[int] = Field(None)
+    status: PikaStatusEnum
+    url: Optional[str] = Field(None)
--- a/comfy_api_nodes/apis/request_logger.py
+++ b/comfy_api_nodes/apis/request_logger.py
@ -5,61 +5,97 @@ import datetime
 import json
 import logging
 from comfy.cmd import folder_paths
+import re
+import hashlib
+from typing import Any

 # Get the logger instance
 logger = logging.getLogger(__name__)

+
 def get_log_directory():
-    """
-    Ensures the API log directory exists within ComfyUI's temp directory
-    and returns its path.
-    """
+    """Ensures the API log directory exists within ComfyUI's temp directory and returns its path."""
    base_temp_dir = folder_paths.get_temp_directory()
    log_dir = os.path.join(base_temp_dir, "api_logs")
    try:
        os.makedirs(log_dir, exist_ok=True)
    except Exception as e:
-        logger.error(f"Error creating API log directory {log_dir}: {e}")
+        logger.error("Error creating API log directory %s: %s", log_dir, str(e))
        # Fallback to base temp directory if sub-directory creation fails
        return base_temp_dir
    return log_dir

-def _format_data_for_logging(data):
+
+def _sanitize_filename_component(name: str) -> str:
+    if not name:
+        return "log"
+    sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", name)  # Replace disallowed characters with underscore
+    sanitized = sanitized.strip(" ._")  # Windows: trailing dots or spaces are not allowed
+    if not sanitized:
+        sanitized = "log"
+    return sanitized
+
+
+def _short_hash(*parts: str, length: int = 10) -> str:
+    return hashlib.sha1(("|".join(parts)).encode("utf-8")).hexdigest()[:length]
+
+
+def _build_log_filepath(log_dir: str, operation_id: str, request_url: str) -> str:
+    """Build log filepath. We keep it well under common path length limits aiming for <= 240 characters total."""
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    slug = _sanitize_filename_component(operation_id)  # Best-effort human-readable slug from operation_id
+    h = _short_hash(operation_id or "", request_url or "")  # Short hash ties log to the full operation and URL
+
+    # Compute how much room we have for the slug given the directory length
+    # Keep total path length reasonably below ~260 on Windows.
+    max_total_path = 240
+    prefix = f"{timestamp}_"
+    suffix = f"_{h}.log"
+    if not slug:
+        slug = "op"
+    max_filename_len = max(60, max_total_path - len(log_dir) - 1)
+    max_slug_len = max(8, max_filename_len - len(prefix) - len(suffix))
+    if len(slug) > max_slug_len:
+        slug = slug[:max_slug_len].rstrip(" ._-")
+    return os.path.join(log_dir, f"{prefix}{slug}{suffix}")
+
+
+def _format_data_for_logging(data: Any) -> str:
    """Helper to format data (dict, str, bytes) for logging."""
    if isinstance(data, bytes):
        try:
-            return data.decode('utf-8')  # Try to decode as text
+            return data.decode("utf-8")  # Try to decode as text
        except UnicodeDecodeError:
            return f"[Binary data of length {len(data)} bytes]"
    elif isinstance(data, (dict, list)):
        try:
            return json.dumps(data, indent=2, ensure_ascii=False)
        except TypeError:
-            return str(data) # Fallback for non-serializable objects
+            return str(data)  # Fallback for non-serializable objects
    return str(data)

+
 def log_request_response(
-    operation_id: str,
-    request_method: str,
-    request_url: str,
-    request_headers: dict | None = None,
-    request_params: dict | None = None,
-    request_data: any = None,
-    response_status_code: int | None = None,
-    response_headers: dict | None = None,
-    response_content: any = None,
-    error_message: str | None = None
+        operation_id: str,
+        request_method: str,
+        request_url: str,
+        request_headers: dict | None = None,
+        request_params: dict | None = None,
+        request_data: Any = None,
+        response_status_code: int | None = None,
+        response_headers: dict | None = None,
+        response_content: Any = None,
+        error_message: str | None = None,
 ):
    """
    Logs API request and response details to a file in the temp/api_logs directory.
+    Filenames are sanitized and length-limited for cross-platform safety.
+    If we still fail to write, we fall back to appending into api.log.
    """
    log_dir = get_log_directory()
-    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-    filename = f"{timestamp}_{operation_id.replace('/', '_').replace(':', '_')}.log"
-    filepath = os.path.join(log_dir, filename)
-
-    log_content = []
+    filepath = _build_log_filepath(log_dir, operation_id, request_url)

+    log_content: list[str] = []
    log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
    log_content.append(f"Operation ID: {operation_id}")
    log_content.append("-" * 30 + " REQUEST " + "-" * 30)
@ -69,7 +105,7 @@ def log_request_response(
        log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
    if request_params:
        log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
-    if request_data:
+    if request_data is not None:
        log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")

    log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
@ -77,7 +113,7 @@ def log_request_response(
        log_content.append(f"Status Code: {response_status_code}")
    if response_headers:
        log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
-    if response_content:
+    if response_content is not None:
        log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
    if error_message:
        log_content.append(f"Error:\n{error_message}")
@ -85,9 +121,10 @@ def log_request_response(
    try:
        with open(filepath, "w", encoding="utf-8") as f:
            f.write("\n".join(log_content))
-        logger.debug(f"API log saved to: {filepath}")
+        logger.debug("API log saved to: %s", filepath)
    except Exception as e:
-        logger.error(f"Error writing API log to {filepath}: {e}")
+        logger.error("Error writing API log to %s: %s", filepath, str(e))
+

 if __name__ == '__main__':
    # Example usage (for testing the logger directly)
@ -100,6 +137,8 @@ if __name__ == '__main__':
                p = os.path.join(os.path.dirname(__file__), 'temp_test_logs')
                os.makedirs(p, exist_ok=True)
                return p
+
+
        folder_paths = MockFolderPaths()

    log_request_response(
@ -123,5 +162,5 @@ if __name__ == '__main__':
        request_method="GET",
        request_url="https://api.example.com/image.png",
        response_status_code=200,
-        response_content=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR...' # Sample binary data
+        response_content=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR...'  # Sample binary data
    )
--- a/comfy_api_nodes/apis/rodin_api.py
+++ b/comfy_api_nodes/apis/rodin_api.py
@ -52,7 +52,3 @@ class RodinResourceItem(BaseModel):

 class Rodin3DDownloadResponse(BaseModel):
    list: List[RodinResourceItem] = Field(..., description="Source List")
-
-
-
-
--- a/comfy_api_nodes/nodes_bfl.py
+++ b/comfy_api_nodes/nodes_bfl.py
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@ -7,7 +7,7 @@ from typing_extensions import override
 import torch
 from pydantic import BaseModel, Field

-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api_nodes.util.validation_utils import (
    validate_image_aspect_ratio_range,
    get_number_of_images,
@ -237,33 +237,33 @@ async def poll_until_finished(
    ).execute()


-class ByteDanceImageNode(comfy_io.ComfyNode):
+class ByteDanceImageNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceImageNode",
            display_name="ByteDance Image",
            category="api node/image/ByteDance",
            description="Generate images using ByteDance models via api based on prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in Text2ImageModelName],
-                    default=Text2ImageModelName.seedream_3.value,
+                    options=Text2ImageModelName,
+                    default=Text2ImageModelName.seedream_3,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the image",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "size_preset",
                    options=[label for label, _, _ in RECOMMENDED_PRESETS],
                    tooltip="Pick a recommended size. Select Custom to use the width and height below",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "width",
                    default=1024,
                    min=512,
@ -271,7 +271,7 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
                    step=64,
                    tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "height",
                    default=1024,
                    min=512,
@ -279,28 +279,28 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
                    step=64,
                    tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation",
                    optional=True,
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "guidance_scale",
                    default=2.5,
                    min=1.0,
                    max=10.0,
                    step=0.01,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Higher value makes the image follow the prompt more closely",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the image",
@ -308,12 +308,12 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -329,7 +329,7 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
        seed: int,
        guidance_scale: float,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        w = h = None
        for label, tw, th in RECOMMENDED_PRESETS:
@ -367,57 +367,57 @@ class ByteDanceImageNode(comfy_io.ComfyNode):
            request=payload,
            auth_kwargs=auth_kwargs,
        ).execute()
-        return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
+        return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))


-class ByteDanceImageEditNode(comfy_io.ComfyNode):
+class ByteDanceImageEditNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceImageEditNode",
            display_name="ByteDance Image Edit",
            category="api node/image/ByteDance",
            description="Edit images using ByteDance models via api based on prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in Image2ImageModelName],
-                    default=Image2ImageModelName.seededit_3.value,
+                    options=Image2ImageModelName,
+                    default=Image2ImageModelName.seededit_3,
                    tooltip="Model name",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="The base image to edit",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Instruction to edit image",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation",
                    optional=True,
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "guidance_scale",
                    default=5.5,
                    min=1.0,
                    max=10.0,
                    step=0.01,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Higher value makes the image follow the prompt more closely",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the image",
@ -425,12 +425,12 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -444,7 +444,7 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
        seed: int,
        guidance_scale: float,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        if get_number_of_images(image) != 1:
            raise ValueError("Exactly one input image is required.")
@ -477,42 +477,42 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
            request=payload,
            auth_kwargs=auth_kwargs,
        ).execute()
-        return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
+        return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))


-class ByteDanceSeedreamNode(comfy_io.ComfyNode):
+class ByteDanceSeedreamNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceSeedreamNode",
            display_name="ByteDance Seedream 4",
            category="api node/image/ByteDance",
            description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["seedream-4-0-250828"],
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for creating or editing an image.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="Input image(s) for image-to-image generation. "
                            "List of 1-10 images for single or multi-reference generation.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "size_preset",
                    options=[label for label, _, _ in RECOMMENDED_PRESETS_SEEDREAM_4],
                    tooltip="Pick a recommended size. Select Custom to use the width and height below.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "width",
                    default=2048,
                    min=1024,
@ -521,7 +521,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
                    tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "height",
                    default=2048,
                    min=1024,
@ -530,7 +530,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
                    tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "sequential_image_generation",
                    options=["disabled", "auto"],
                    tooltip="Group image generation mode. "
@ -539,35 +539,35 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
                            "(e.g., story scenes, character variations).",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "max_images",
                    default=1,
                    min=1,
                    max=15,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Maximum number of images to generate when sequential_image_generation='auto'. "
                            "Total images (input + generated) cannot exceed 15.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the image.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "fail_on_partial",
                    default=True,
                    tooltip="If enabled, abort execution if any requested images are missing or return an error.",
@ -575,12 +575,12 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -599,7 +599,7 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
        seed: int = 0,
        watermark: bool = True,
        fail_on_partial: bool = True,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        w = h = None
        for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
@ -657,72 +657,72 @@ class ByteDanceSeedreamNode(comfy_io.ComfyNode):
        ).execute()

        if len(response.data) == 1:
-            return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
+            return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
        urls = [str(d["url"]) for d in response.data if isinstance(d, dict) and "url" in d]
        if fail_on_partial and len(urls) < len(response.data):
            raise RuntimeError(f"Only {len(urls)} of {len(response.data)} images were generated before error.")
-        return comfy_io.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls]))
+        return IO.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls]))


-class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
+class ByteDanceTextToVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceTextToVideoNode",
            display_name="ByteDance Text to Video",
            category="api node/video/ByteDance",
            description="Generate video using ByteDance models via api based on prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in Text2VideoModelName],
-                    default=Text2VideoModelName.seedance_1_pro.value,
+                    options=Text2VideoModelName,
+                    default=Text2VideoModelName.seedance_1_pro,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=["480p", "720p", "1080p"],
                    tooltip="The resolution of the output video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
                    tooltip="The aspect ratio of the output video.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=3,
                    max=12,
                    step=1,
                    tooltip="The duration of the output video in seconds.",
-                    display_mode=comfy_io.NumberDisplay.slider,
+                    display_mode=IO.NumberDisplay.slider,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "camera_fixed",
                    default=False,
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                            "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -730,12 +730,12 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -751,7 +751,7 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
        seed: int,
        camera_fixed: bool,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])

@ -781,69 +781,69 @@ class ByteDanceTextToVideoNode(comfy_io.ComfyNode):
        )


-class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
+class ByteDanceImageToVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceImageToVideoNode",
            display_name="ByteDance Image to Video",
            category="api node/video/ByteDance",
            description="Generate video using ByteDance models via api based on image and prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in Image2VideoModelName],
-                    default=Image2VideoModelName.seedance_1_pro.value,
+                    options=Image2VideoModelName,
+                    default=Image2VideoModelName.seedance_1_pro,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the video.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="First frame to be used for the video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=["480p", "720p", "1080p"],
                    tooltip="The resolution of the output video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
                    tooltip="The aspect ratio of the output video.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=3,
                    max=12,
                    step=1,
                    tooltip="The duration of the output video in seconds.",
-                    display_mode=comfy_io.NumberDisplay.slider,
+                    display_mode=IO.NumberDisplay.slider,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "camera_fixed",
                    default=False,
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                            "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -851,12 +851,12 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -873,7 +873,7 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
        seed: int,
        camera_fixed: bool,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
        validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
@ -908,73 +908,73 @@ class ByteDanceImageToVideoNode(comfy_io.ComfyNode):
        )


-class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
+class ByteDanceFirstLastFrameNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceFirstLastFrameNode",
            display_name="ByteDance First-Last-Frame to Video",
            category="api node/video/ByteDance",
            description="Generate video using prompt and first and last frames.",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[Image2VideoModelName.seedance_1_lite.value],
+                    options=[model.value for model in Image2VideoModelName],
                    default=Image2VideoModelName.seedance_1_lite.value,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the video.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "first_frame",
                    tooltip="First frame to be used for the video.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "last_frame",
                    tooltip="Last frame to be used for the video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=["480p", "720p", "1080p"],
                    tooltip="The resolution of the output video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
                    tooltip="The aspect ratio of the output video.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=3,
                    max=12,
                    step=1,
                    tooltip="The duration of the output video in seconds.",
-                    display_mode=comfy_io.NumberDisplay.slider,
+                    display_mode=IO.NumberDisplay.slider,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "camera_fixed",
                    default=False,
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                            "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -982,12 +982,12 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -1005,7 +1005,7 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
        seed: int,
        camera_fixed: bool,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
        for i in (first_frame, last_frame):
@ -1050,62 +1050,62 @@ class ByteDanceFirstLastFrameNode(comfy_io.ComfyNode):
        )


-class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
+class ByteDanceImageReferenceNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ByteDanceImageReferenceNode",
            display_name="ByteDance Reference Images to Video",
            category="api node/video/ByteDance",
            description="Generate video using prompt and reference images.",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=[Image2VideoModelName.seedance_1_lite.value],
                    default=Image2VideoModelName.seedance_1_lite.value,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the video.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "images",
                    tooltip="One to four images.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=["480p", "720p"],
                    tooltip="The resolution of the output video.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["adaptive", "16:9", "4:3", "1:1", "3:4", "9:16", "21:9"],
                    tooltip="The aspect ratio of the output video.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=3,
                    max=12,
                    step=1,
                    tooltip="The duration of the output video in seconds.",
-                    display_mode=comfy_io.NumberDisplay.slider,
+                    display_mode=IO.NumberDisplay.slider,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the video.",
@ -1113,12 +1113,12 @@ class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -1134,7 +1134,7 @@ class ByteDanceImageReferenceNode(comfy_io.ComfyNode):
        duration: int,
        seed: int,
        watermark: bool,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "watermark"])
        for image in images:
@ -1180,7 +1180,7 @@ async def process_video_task(
    auth_kwargs: dict,
    node_id: str,
    estimated_duration: Optional[int],
-) -> comfy_io.NodeOutput:
+) -> IO.NodeOutput:
    initial_response = await SynchronousOperation(
        endpoint=ApiEndpoint(
            path=BYTEPLUS_TASK_ENDPOINT,
@ -1197,7 +1197,7 @@ async def process_video_task(
        estimated_duration=estimated_duration,
        node_id=node_id,
    )
-    return comfy_io.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(response)))
+    return IO.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(response)))


 def raise_if_text_params(prompt: str, text_params: list[str]) -> None:
@ -1210,7 +1210,7 @@ def raise_if_text_params(prompt: str, text_params: list[str]) -> None:

 class ByteDanceExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            ByteDanceImageNode,
            ByteDanceImageEditNode,
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@ -26,7 +26,7 @@ from comfy_api_nodes.apis import (
    GeminiPart,
    GeminiMimeType,
 )
-from comfy_api_nodes.apis.gemini_api import GeminiImageGenerationConfig, GeminiImageGenerateContentRequest
+from comfy_api_nodes.apis.gemini_api import GeminiImageGenerationConfig, GeminiImageGenerateContentRequest, GeminiImageConfig
 from comfy_api_nodes.apis.client import (
    ApiEndpoint,
    HttpMethod,
@ -39,6 +39,7 @@ from comfy_api_nodes.apinode_utils import (
    tensor_to_base64_string,
    bytesio_to_image_tensor,
 )
+from comfy_api.util import VideoContainer, VideoCodec


 GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
@ -62,6 +63,7 @@ class GeminiImageModel(str, Enum):
    """

    gemini_2_5_flash_image_preview = "gemini-2.5-flash-image-preview"
+    gemini_2_5_flash_image = "gemini-2.5-flash-image"


 def get_gemini_endpoint(
@ -310,7 +312,7 @@ class GeminiNode(ComfyNodeABC):
        Returns:
            List of GeminiPart objects containing the encoded video.
        """
-        from comfy_api.util import VideoContainer, VideoCodec
+
        base_64_string = video_to_base64_string(
            video_input,
            container_format=VideoContainer.MP4,
@ -490,7 +492,6 @@ class GeminiInputFiles(ComfyNodeABC):
        # Use base64 string directly, not the data URI
        with open(file_path, "rb") as f:
            file_content = f.read()
-        import base64
        base64_str = base64.b64encode(file_content).decode("utf-8")

        return GeminiPart(
@ -538,7 +539,7 @@ class GeminiImage(ComfyNodeABC):
                    {
                        "tooltip": "The Gemini model to use for generating responses.",
                        "options": [model.value for model in GeminiImageModel],
-                        "default": GeminiImageModel.gemini_2_5_flash_image_preview.value,
+                        "default": GeminiImageModel.gemini_2_5_flash_image.value,
                    },
                ),
                "seed": (
@ -579,6 +580,14 @@ class GeminiImage(ComfyNodeABC):
                #         "tooltip": "How many images to generate",
                #     },
                # ),
+                "aspect_ratio": (
+                    IO.COMBO,
+                    {
+                        "tooltip": "Defaults to matching the output image size to that of your input image, or otherwise generates 1:1 squares.",
+                        "options": ["auto", "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
+                        "default": "auto",
+                    },
+                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
@ -600,15 +609,17 @@ class GeminiImage(ComfyNodeABC):
        images: Optional[IO.IMAGE] = None,
        files: Optional[list[GeminiPart]] = None,
        n=1,
+        aspect_ratio: str = "auto",
        unique_id: Optional[str] = None,
        **kwargs,
    ):
-        # Validate inputs
        validate_string(prompt, strip_whitespace=True, min_length=1)
-        # Create parts list with text prompt as the first part
        parts: list[GeminiPart] = [create_text_part(prompt)]

-        # Add other modal parts
+        if not aspect_ratio:
+            aspect_ratio = "auto"  # for backward compatability with old workflows; to-do remove this in December
+        image_config = GeminiImageConfig(aspectRatio=aspect_ratio)
+
        if images is not None:
            image_parts = create_image_parts(images)
            parts.extend(image_parts)
@ -625,7 +636,8 @@ class GeminiImage(ComfyNodeABC):
                    ),
                ],
                generationConfig=GeminiImageGenerationConfig(
-                    responseModalities=["TEXT","IMAGE"]
+                    responseModalities=["TEXT","IMAGE"],
+                    imageConfig=None if aspect_ratio == "auto" else image_config,
                )
            ),
            auth_kwargs=kwargs,
--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@ -1,6 +1,6 @@
 from io import BytesIO
 from typing_extensions import override
-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from PIL import Image
 import numpy as np
 import torch
@ -246,76 +246,76 @@ def display_image_urls_on_node(image_urls, node_id):
            PromptServer.instance.send_progress_text(urls_text, node_id)


-class IdeogramV1(comfy_io.ComfyNode):
+class IdeogramV1(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="IdeogramV1",
            display_name="Ideogram V1",
            category="api node/image/Ideogram",
            description="Generates images using the Ideogram V1 model.",
            is_api_node=True,
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the image generation",
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "turbo",
                    default=False,
                    tooltip="Whether to use turbo mode (faster generation, potentially lower quality)",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=list(V1_V2_RATIO_MAP.keys()),
                    default="1:1",
                    tooltip="The aspect ratio for image generation.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "magic_prompt_option",
                    options=["AUTO", "ON", "OFF"],
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Description of what to exclude from the image",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "num_images",
                    default=1,
                    min=1,
                    max=8,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
        )

@ -372,39 +372,39 @@ class IdeogramV1(comfy_io.ComfyNode):
            raise Exception("No image URLs were generated in the response")

        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_and_process_images(image_urls))
+        return IO.NodeOutput(await download_and_process_images(image_urls))


-class IdeogramV2(comfy_io.ComfyNode):
+class IdeogramV2(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="IdeogramV2",
            display_name="Ideogram V2",
            category="api node/image/Ideogram",
            description="Generates images using the Ideogram V2 model.",
            is_api_node=True,
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the image generation",
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "turbo",
                    default=False,
                    tooltip="Whether to use turbo mode (faster generation, potentially lower quality)",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=list(V1_V2_RATIO_MAP.keys()),
                    default="1:1",
                    tooltip="The aspect ratio for image generation. Ignored if resolution is not set to AUTO.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=list(V1_V1_RES_MAP.keys()),
                    default="Auto",
@ -412,44 +412,44 @@ class IdeogramV2(comfy_io.ComfyNode):
                            "If not set to AUTO, this overrides the aspect_ratio setting.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "magic_prompt_option",
                    options=["AUTO", "ON", "OFF"],
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "style_type",
                    options=["AUTO", "GENERAL", "REALISTIC", "DESIGN", "RENDER_3D", "ANIME"],
                    default="NONE",
                    tooltip="Style type for generation (V2 only)",
                    optional=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Description of what to exclude from the image",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "num_images",
                    default=1,
                    min=1,
                    max=8,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
                #"color_palette": (
@ -462,12 +462,12 @@ class IdeogramV2(comfy_io.ComfyNode):
                #),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
        )

@ -541,14 +541,14 @@ class IdeogramV2(comfy_io.ComfyNode):
            raise Exception("No image URLs were generated in the response")

        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_and_process_images(image_urls))
+        return IO.NodeOutput(await download_and_process_images(image_urls))


-class IdeogramV3(comfy_io.ComfyNode):
+class IdeogramV3(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="IdeogramV3",
            display_name="Ideogram V3",
            category="api node/image/Ideogram",
@ -556,30 +556,30 @@ class IdeogramV3(comfy_io.ComfyNode):
                        "Supports both regular image generation from text prompts and image editing with mask.",
            is_api_node=True,
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the image generation or editing",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="Optional reference image for image editing.",
                    optional=True,
                ),
-                comfy_io.Mask.Input(
+                IO.Mask.Input(
                    "mask",
                    tooltip="Optional mask for inpainting (white areas will be replaced)",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=list(V3_RATIO_MAP.keys()),
                    default="1:1",
                    tooltip="The aspect ratio for image generation. Ignored if resolution is not set to Auto.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=V3_RESOLUTIONS,
                    default="Auto",
@ -587,57 +587,57 @@ class IdeogramV3(comfy_io.ComfyNode):
                            "If not set to Auto, this overrides the aspect_ratio setting.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "magic_prompt_option",
                    options=["AUTO", "ON", "OFF"],
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "num_images",
                    default=1,
                    min=1,
                    max=8,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "rendering_speed",
                    options=["DEFAULT", "TURBO", "QUALITY"],
                    default="DEFAULT",
                    tooltip="Controls the trade-off between generation speed and quality",
                    optional=True,
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "character_image",
                    tooltip="Image to use as character reference.",
                    optional=True,
                ),
-                comfy_io.Mask.Input(
+                IO.Mask.Input(
                    "character_mask",
                    tooltip="Optional mask for character reference image.",
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
        )

@ -826,12 +826,12 @@ class IdeogramV3(comfy_io.ComfyNode):
            raise Exception("No image URLs were generated in the response")

        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_and_process_images(image_urls))
+        return IO.NodeOutput(await download_and_process_images(image_urls))


 class IdeogramExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            IdeogramV1,
            IdeogramV2,
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
--- a/comfy_api_nodes/nodes_luma.py
+++ b/comfy_api_nodes/nodes_luma.py
@ -1,7 +1,8 @@
 from __future__ import annotations
 from inspect import cleandoc
 from typing import Optional
-from comfy.comfy_types.node_typing import IO, ComfyNodeABC
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api.input_impl.video_types import VideoFromFile
 from comfy_api_nodes.apis.luma_api import (
    LumaImageModel,
@ -51,174 +52,186 @@ def image_result_url_extractor(response: LumaGeneration):
 def video_result_url_extractor(response: LumaGeneration):
    return response.assets.video if hasattr(response, "assets") and hasattr(response.assets, "video") else None

-class LumaReferenceNode(ComfyNodeABC):
+class LumaReferenceNode(IO.ComfyNode):
    """
    Holds an image and weight for use with Luma Generate Image node.
    """

-    RETURN_TYPES = (LumaIO.LUMA_REF,)
-    RETURN_NAMES = ("luma_ref",)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "create_luma_reference"
-    CATEGORY = "api node/image/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaReferenceNode",
+            display_name="Luma Reference",
+            category="api node/image/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.Image.Input(
+                    "image",
+                    tooltip="Image to use as reference.",
+                ),
+                IO.Float.Input(
+                    "weight",
+                    default=1.0,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    tooltip="Weight of image reference.",
+                ),
+                IO.Custom(LumaIO.LUMA_REF).Input(
+                    "luma_ref",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Custom(LumaIO.LUMA_REF).Output(display_name="luma_ref")],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (
-                    IO.IMAGE,
-                    {
-                        "tooltip": "Image to use as reference.",
-                    },
-                ),
-                "weight": (
-                    IO.FLOAT,
-                    {
-                        "default": 1.0,
-                        "min": 0.0,
-                        "max": 1.0,
-                        "step": 0.01,
-                        "tooltip": "Weight of image reference.",
-                    },
-                ),
-            },
-            "optional": {"luma_ref": (LumaIO.LUMA_REF,)},
-        }
-
-    def create_luma_reference(
-        self, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None
-    ):
+    def execute(
+        cls, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None
+    ) -> IO.NodeOutput:
        if luma_ref is not None:
            luma_ref = luma_ref.clone()
        else:
            luma_ref = LumaReferenceChain()
        luma_ref.add(LumaReference(image=image, weight=round(weight, 2)))
-        return (luma_ref,)
+        return IO.NodeOutput(luma_ref)


-class LumaConceptsNode(ComfyNodeABC):
+class LumaConceptsNode(IO.ComfyNode):
    """
    Holds one or more Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes.
    """

-    RETURN_TYPES = (LumaIO.LUMA_CONCEPTS,)
-    RETURN_NAMES = ("luma_concepts",)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "create_concepts"
-    CATEGORY = "api node/video/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaConceptsNode",
+            display_name="Luma Concepts",
+            category="api node/video/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.Combo.Input(
+                    "concept1",
+                    options=get_luma_concepts(include_none=True),
+                ),
+                IO.Combo.Input(
+                    "concept2",
+                    options=get_luma_concepts(include_none=True),
+                ),
+                IO.Combo.Input(
+                    "concept3",
+                    options=get_luma_concepts(include_none=True),
+                ),
+                IO.Combo.Input(
+                    "concept4",
+                    options=get_luma_concepts(include_none=True),
+                ),
+                IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
+                    "luma_concepts",
+                    tooltip="Optional Camera Concepts to add to the ones chosen here.",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Custom(LumaIO.LUMA_CONCEPTS).Output(display_name="luma_concepts")],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "concept1": (get_luma_concepts(include_none=True),),
-                "concept2": (get_luma_concepts(include_none=True),),
-                "concept3": (get_luma_concepts(include_none=True),),
-                "concept4": (get_luma_concepts(include_none=True),),
-            },
-            "optional": {
-                "luma_concepts": (
-                    LumaIO.LUMA_CONCEPTS,
-                    {
-                        "tooltip": "Optional Camera Concepts to add to the ones chosen here."
-                    },
-                ),
-            },
-        }
-
-    def create_concepts(
-        self,
+    def execute(
+        cls,
        concept1: str,
        concept2: str,
        concept3: str,
        concept4: str,
        luma_concepts: LumaConceptChain = None,
-    ):
+    ) -> IO.NodeOutput:
        chain = LumaConceptChain(str_list=[concept1, concept2, concept3, concept4])
        if luma_concepts is not None:
            chain = luma_concepts.clone_and_merge(chain)
-        return (chain,)
+        return IO.NodeOutput(chain)


-class LumaImageGenerationNode(ComfyNodeABC):
+class LumaImageGenerationNode(IO.ComfyNode):
    """
    Generates images synchronously based on prompt and aspect ratio.
    """

-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaImageNode",
+            display_name="Luma Text to Image",
+            category="api node/image/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the image generation",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=LumaImageModel,
+                ),
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=LumaAspectRatio,
+                    default=LumaAspectRatio.ratio_16_9,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Float.Input(
+                    "style_image_weight",
+                    default=1.0,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    tooltip="Weight of style image. Ignored if no style_image provided.",
+                ),
+                IO.Custom(LumaIO.LUMA_REF).Input(
+                    "image_luma_ref",
+                    tooltip="Luma Reference node connection to influence generation with input images; up to 4 images can be considered.",
+                    optional=True,
+                ),
+                IO.Image.Input(
+                    "style_image",
+                    tooltip="Style reference image; only 1 image will be used.",
+                    optional=True,
+                ),
+                IO.Image.Input(
+                    "character_image",
+                    tooltip="Character reference images; can be a batch of multiple, up to 4 images can be considered.",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation",
-                    },
-                ),
-                "model": ([model.value for model in LumaImageModel],),
-                "aspect_ratio": (
-                    [ratio.value for ratio in LumaAspectRatio],
-                    {
-                        "default": LumaAspectRatio.ratio_16_9,
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-                "style_image_weight": (
-                    IO.FLOAT,
-                    {
-                        "default": 1.0,
-                        "min": 0.0,
-                        "max": 1.0,
-                        "step": 0.01,
-                        "tooltip": "Weight of style image. Ignored if no style_image provided.",
-                    },
-                ),
-            },
-            "optional": {
-                "image_luma_ref": (
-                    LumaIO.LUMA_REF,
-                    {
-                        "tooltip": "Luma Reference node connection to influence generation with input images; up to 4 images can be considered."
-                    },
-                ),
-                "style_image": (
-                    IO.IMAGE,
-                    {"tooltip": "Style reference image; only 1 image will be used."},
-                ),
-                "character_image": (
-                    IO.IMAGE,
-                    {
-                        "tooltip": "Character reference images; can be a batch of multiple, up to 4 images can be considered."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        prompt: str,
        model: str,
        aspect_ratio: str,
@ -227,27 +240,29 @@ class LumaImageGenerationNode(ComfyNodeABC):
        image_luma_ref: LumaReferenceChain = None,
        style_image: torch.Tensor = None,
        character_image: torch.Tensor = None,
-        unique_id: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=3)
+        auth_kwargs = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
        # handle image_luma_ref
        api_image_ref = None
        if image_luma_ref is not None:
-            api_image_ref = await self._convert_luma_refs(
-                image_luma_ref, max_refs=4, auth_kwargs=kwargs,
+            api_image_ref = await cls._convert_luma_refs(
+                image_luma_ref, max_refs=4, auth_kwargs=auth_kwargs,
            )
        # handle style_luma_ref
        api_style_ref = None
        if style_image is not None:
-            api_style_ref = await self._convert_style_image(
-                style_image, weight=style_image_weight, auth_kwargs=kwargs,
+            api_style_ref = await cls._convert_style_image(
+                style_image, weight=style_image_weight, auth_kwargs=auth_kwargs,
            )
        # handle character_ref images
        character_ref = None
        if character_image is not None:
            download_urls = await upload_images_to_comfyapi(
-                character_image, max_images=4, auth_kwargs=kwargs,
+                character_image, max_images=4, auth_kwargs=auth_kwargs,
            )
            character_ref = LumaCharacterRef(
                identity0=LumaImageIdentity(images=download_urls)
@ -268,7 +283,7 @@ class LumaImageGenerationNode(ComfyNodeABC):
                style_ref=api_style_ref,
                character_ref=character_ref,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_api: LumaGeneration = await operation.execute()

@ -283,18 +298,19 @@ class LumaImageGenerationNode(ComfyNodeABC):
            failed_statuses=[LumaState.failed],
            status_extractor=lambda x: x.state,
            result_url_extractor=image_result_url_extractor,
-            node_id=unique_id,
-            auth_kwargs=kwargs,
+            node_id=cls.hidden.unique_id,
+            auth_kwargs=auth_kwargs,
        )
        response_poll = await operation.execute()

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.assets.image) as img_response:
                img = process_image_response(await img_response.content.read())
-        return (img,)
+        return IO.NodeOutput(img)

+    @classmethod
    async def _convert_luma_refs(
-        self, luma_ref: LumaReferenceChain, max_refs: int, auth_kwargs: Optional[dict[str,str]] = None
+        cls, luma_ref: LumaReferenceChain, max_refs: int, auth_kwargs: Optional[dict[str,str]] = None
    ):
        luma_urls = []
        ref_count = 0
@ -308,82 +324,84 @@ class LumaImageGenerationNode(ComfyNodeABC):
                break
        return luma_ref.create_api_model(download_urls=luma_urls, max_refs=max_refs)

+    @classmethod
    async def _convert_style_image(
-        self, style_image: torch.Tensor, weight: float, auth_kwargs: Optional[dict[str,str]] = None
+        cls, style_image: torch.Tensor, weight: float, auth_kwargs: Optional[dict[str,str]] = None
    ):
        chain = LumaReferenceChain(
            first_ref=LumaReference(image=style_image, weight=weight)
        )
-        return await self._convert_luma_refs(chain, max_refs=1, auth_kwargs=auth_kwargs)
+        return await cls._convert_luma_refs(chain, max_refs=1, auth_kwargs=auth_kwargs)


-class LumaImageModifyNode(ComfyNodeABC):
+class LumaImageModifyNode(IO.ComfyNode):
    """
    Modifies images synchronously based on prompt and aspect ratio.
    """

-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaImageModifyNode",
+            display_name="Luma Image to Image",
+            category="api node/image/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.Image.Input(
+                    "image",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the image generation",
+                ),
+                IO.Float.Input(
+                    "image_weight",
+                    default=0.1,
+                    min=0.0,
+                    max=0.98,
+                    step=0.01,
+                    tooltip="Weight of the image; the closer to 1.0, the less the image will be modified.",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=LumaImageModel,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE,),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation",
-                    },
-                ),
-                "image_weight": (
-                    IO.FLOAT,
-                    {
-                        "default": 0.1,
-                        "min": 0.0,
-                        "max": 0.98,
-                        "step": 0.01,
-                        "tooltip": "Weight of the image; the closer to 1.0, the less the image will be modified.",
-                    },
-                ),
-                "model": ([model.value for model in LumaImageModel],),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {},
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        prompt: str,
        model: str,
        image: torch.Tensor,
        image_weight: float,
        seed,
-        unique_id: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
+        auth_kwargs = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
        # first, upload image
        download_urls = await upload_images_to_comfyapi(
-            image, max_images=1, auth_kwargs=kwargs,
+            image, max_images=1, auth_kwargs=auth_kwargs,
        )
        image_url = download_urls[0]
        # next, make Luma call with download url provided
@ -401,7 +419,7 @@ class LumaImageModifyNode(ComfyNodeABC):
                    url=image_url, weight=round(max(min(1.0-image_weight, 0.98), 0.0), 2)
                ),
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_api: LumaGeneration = await operation.execute()

@ -416,88 +434,84 @@ class LumaImageModifyNode(ComfyNodeABC):
            failed_statuses=[LumaState.failed],
            status_extractor=lambda x: x.state,
            result_url_extractor=image_result_url_extractor,
-            node_id=unique_id,
-            auth_kwargs=kwargs,
+            node_id=cls.hidden.unique_id,
+            auth_kwargs=auth_kwargs,
        )
        response_poll = await operation.execute()

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.assets.image) as img_response:
                img = process_image_response(await img_response.content.read())
-        return (img,)
+        return IO.NodeOutput(img)


-class LumaTextToVideoGenerationNode(ComfyNodeABC):
+class LumaTextToVideoGenerationNode(IO.ComfyNode):
    """
    Generates videos synchronously based on prompt and output_size.
    """

-    RETURN_TYPES = (IO.VIDEO,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/video/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaVideoNode",
+            display_name="Luma Text to Video",
+            category="api node/video/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the video generation",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=LumaVideoModel,
+                ),
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=LumaAspectRatio,
+                    default=LumaAspectRatio.ratio_16_9,
+                ),
+                IO.Combo.Input(
+                    "resolution",
+                    options=LumaVideoOutputResolution,
+                    default=LumaVideoOutputResolution.res_540p,
+                ),
+                IO.Combo.Input(
+                    "duration",
+                    options=LumaVideoModelOutputDuration,
+                ),
+                IO.Boolean.Input(
+                    "loop",
+                    default=False,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
+                    "luma_concepts",
+                    tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
+                    optional=True,
+                )
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the video generation",
-                    },
-                ),
-                "model": ([model.value for model in LumaVideoModel],),
-                "aspect_ratio": (
-                    [ratio.value for ratio in LumaAspectRatio],
-                    {
-                        "default": LumaAspectRatio.ratio_16_9,
-                    },
-                ),
-                "resolution": (
-                    [resolution.value for resolution in LumaVideoOutputResolution],
-                    {
-                        "default": LumaVideoOutputResolution.res_540p,
-                    },
-                ),
-                "duration": ([dur.value for dur in LumaVideoModelOutputDuration],),
-                "loop": (
-                    IO.BOOLEAN,
-                    {
-                        "default": False,
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "luma_concepts": (
-                    LumaIO.LUMA_CONCEPTS,
-                    {
-                        "tooltip": "Optional Camera Concepts to dictate camera motion via the Luma Concepts node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        prompt: str,
        model: str,
        aspect_ratio: str,
@ -506,13 +520,15 @@ class LumaTextToVideoGenerationNode(ComfyNodeABC):
        loop: bool,
        seed,
        luma_concepts: LumaConceptChain = None,
-        unique_id: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False, min_length=3)
        duration = duration if model != LumaVideoModel.ray_1_6 else None
        resolution = resolution if model != LumaVideoModel.ray_1_6 else None

+        auth_kwargs = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
        operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path="/proxy/luma/generations",
@ -529,12 +545,12 @@ class LumaTextToVideoGenerationNode(ComfyNodeABC):
                loop=loop,
                concepts=luma_concepts.create_api_model() if luma_concepts else None,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_api: LumaGeneration = await operation.execute()

-        if unique_id:
-            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", unique_id)
+        if cls.hidden.unique_id:
+            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", cls.hidden.unique_id)

        operation = PollingOperation(
            poll_endpoint=ApiEndpoint(
@ -547,90 +563,94 @@ class LumaTextToVideoGenerationNode(ComfyNodeABC):
            failed_statuses=[LumaState.failed],
            status_extractor=lambda x: x.state,
            result_url_extractor=video_result_url_extractor,
-            node_id=unique_id,
+            node_id=cls.hidden.unique_id,
            estimated_duration=LUMA_T2V_AVERAGE_DURATION,
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_poll = await operation.execute()

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.assets.video) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))


-class LumaImageToVideoGenerationNode(ComfyNodeABC):
+class LumaImageToVideoGenerationNode(IO.ComfyNode):
    """
    Generates videos synchronously based on prompt, input images, and output_size.
    """

-    RETURN_TYPES = (IO.VIDEO,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/video/Luma"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="LumaImageToVideoNode",
+            display_name="Luma Image to Video",
+            category="api node/video/Luma",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the video generation",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=LumaVideoModel,
+                ),
+                # IO.Combo.Input(
+                #     "aspect_ratio",
+                #     options=[ratio.value for ratio in LumaAspectRatio],
+                #     default=LumaAspectRatio.ratio_16_9,
+                # ),
+                IO.Combo.Input(
+                    "resolution",
+                    options=LumaVideoOutputResolution,
+                    default=LumaVideoOutputResolution.res_540p,
+                ),
+                IO.Combo.Input(
+                    "duration",
+                    options=[dur.value for dur in LumaVideoModelOutputDuration],
+                ),
+                IO.Boolean.Input(
+                    "loop",
+                    default=False,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Image.Input(
+                    "first_image",
+                    tooltip="First frame of generated video.",
+                    optional=True,
+                ),
+                IO.Image.Input(
+                    "last_image",
+                    tooltip="Last frame of generated video.",
+                    optional=True,
+                ),
+                IO.Custom(LumaIO.LUMA_CONCEPTS).Input(
+                    "luma_concepts",
+                    tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
+                    optional=True,
+                )
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the video generation",
-                    },
-                ),
-                "model": ([model.value for model in LumaVideoModel],),
-                # "aspect_ratio": ([ratio.value for ratio in LumaAspectRatio], {
-                #     "default": LumaAspectRatio.ratio_16_9,
-                # }),
-                "resolution": (
-                    [resolution.value for resolution in LumaVideoOutputResolution],
-                    {
-                        "default": LumaVideoOutputResolution.res_540p,
-                    },
-                ),
-                "duration": ([dur.value for dur in LumaVideoModelOutputDuration],),
-                "loop": (
-                    IO.BOOLEAN,
-                    {
-                        "default": False,
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "first_image": (
-                    IO.IMAGE,
-                    {"tooltip": "First frame of generated video."},
-                ),
-                "last_image": (IO.IMAGE, {"tooltip": "Last frame of generated video."}),
-                "luma_concepts": (
-                    LumaIO.LUMA_CONCEPTS,
-                    {
-                        "tooltip": "Optional Camera Concepts to dictate camera motion via the Luma Concepts node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        prompt: str,
        model: str,
        resolution: str,
@ -640,14 +660,16 @@ class LumaImageToVideoGenerationNode(ComfyNodeABC):
        first_image: torch.Tensor = None,
        last_image: torch.Tensor = None,
        luma_concepts: LumaConceptChain = None,
-        unique_id: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        if first_image is None and last_image is None:
            raise Exception(
                "At least one of first_image and last_image requires an input."
            )
-        keyframes = await self._convert_to_keyframes(first_image, last_image, auth_kwargs=kwargs)
+        auth_kwargs = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
+        keyframes = await cls._convert_to_keyframes(first_image, last_image, auth_kwargs=auth_kwargs)
        duration = duration if model != LumaVideoModel.ray_1_6 else None
        resolution = resolution if model != LumaVideoModel.ray_1_6 else None

@ -668,12 +690,12 @@ class LumaImageToVideoGenerationNode(ComfyNodeABC):
                keyframes=keyframes,
                concepts=luma_concepts.create_api_model() if luma_concepts else None,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_api: LumaGeneration = await operation.execute()

-        if unique_id:
-            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", unique_id)
+        if cls.hidden.unique_id:
+            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", cls.hidden.unique_id)

        operation = PollingOperation(
            poll_endpoint=ApiEndpoint(
@ -686,18 +708,19 @@ class LumaImageToVideoGenerationNode(ComfyNodeABC):
            failed_statuses=[LumaState.failed],
            status_extractor=lambda x: x.state,
            result_url_extractor=video_result_url_extractor,
-            node_id=unique_id,
+            node_id=cls.hidden.unique_id,
            estimated_duration=LUMA_I2V_AVERAGE_DURATION,
-            auth_kwargs=kwargs,
+            auth_kwargs=auth_kwargs,
        )
        response_poll = await operation.execute()

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.assets.video) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))

+    @classmethod
    async def _convert_to_keyframes(
-        self,
+        cls,
        first_image: torch.Tensor = None,
        last_image: torch.Tensor = None,
        auth_kwargs: Optional[dict[str,str]] = None,
@ -719,23 +742,18 @@ class LumaImageToVideoGenerationNode(ComfyNodeABC):
        return LumaKeyframes(frame0=frame0, frame1=frame1)


-# A dictionary that contains all nodes you want to export with their names
-# NOTE: names should be globally unique
-NODE_CLASS_MAPPINGS = {
-    "LumaImageNode": LumaImageGenerationNode,
-    "LumaImageModifyNode": LumaImageModifyNode,
-    "LumaVideoNode": LumaTextToVideoGenerationNode,
-    "LumaImageToVideoNode": LumaImageToVideoGenerationNode,
-    "LumaReferenceNode": LumaReferenceNode,
-    "LumaConceptsNode": LumaConceptsNode,
-}
+class LumaExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            LumaImageGenerationNode,
+            LumaImageModifyNode,
+            LumaTextToVideoGenerationNode,
+            LumaImageToVideoGenerationNode,
+            LumaReferenceNode,
+            LumaConceptsNode,
+        ]

-# A dictionary that contains the friendly/humanly readable titles for the nodes
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "LumaImageNode": "Luma Text to Image",
-    "LumaImageModifyNode": "Luma Image to Image",
-    "LumaVideoNode": "Luma Text to Video",
-    "LumaImageToVideoNode": "Luma Image to Video",
-    "LumaReferenceNode": "Luma Reference",
-    "LumaConceptsNode": "Luma Concepts",
-}
+
+async def comfy_entrypoint() -> LumaExtension:
+    return LumaExtension()
--- a/comfy_api_nodes/nodes_minimax.py
+++ b/comfy_api_nodes/nodes_minimax.py
@ -4,7 +4,7 @@ import logging
 import torch

 from typing_extensions import override
-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api.input_impl.video_types import VideoFromFile
 from comfy_api_nodes.apis import (
    MinimaxVideoGenerationRequest,
@ -43,7 +43,7 @@ async def _generate_mm_video(
    image: Optional[torch.Tensor] = None,   # used for ImageToVideo
    subject: Optional[torch.Tensor] = None, # used for SubjectToVideo
    average_duration: Optional[int] = None,
-) -> comfy_io.NodeOutput:
+) -> IO.NodeOutput:
    if image is None:
        validate_string(prompt_text, field_name="prompt_text")
    # upload image, if passed in
@ -133,35 +133,35 @@ async def _generate_mm_video(
        error_msg = f"Failed to download video from {file_url}"
        logging.error(error_msg)
        raise Exception(error_msg)
-    return comfy_io.NodeOutput(VideoFromFile(video_io))
+    return IO.NodeOutput(VideoFromFile(video_io))


-class MinimaxTextToVideoNode(comfy_io.ComfyNode):
+class MinimaxTextToVideoNode(IO.ComfyNode):
    """
    Generates videos synchronously based on a prompt, and optional parameters using MiniMax's API.
    """

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MinimaxTextToVideoNode",
            display_name="MiniMax Text to Video",
            category="api node/video/MiniMax",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt_text",
                    multiline=True,
                    default="",
                    tooltip="Text prompt to guide the video generation",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["T2V-01", "T2V-01-Director"],
                    default="T2V-01",
                    tooltip="Model to use for video generation",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
@ -172,11 +172,11 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
                    optional=True,
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -187,7 +187,7 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
        prompt_text: str,
        model: str = "T2V-01",
        seed: int = 0,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        return await _generate_mm_video(
            auth={
                "auth_token": cls.hidden.auth_token_comfy_org,
@ -203,36 +203,36 @@ class MinimaxTextToVideoNode(comfy_io.ComfyNode):
        )


-class MinimaxImageToVideoNode(comfy_io.ComfyNode):
+class MinimaxImageToVideoNode(IO.ComfyNode):
    """
    Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
    """

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MinimaxImageToVideoNode",
            display_name="MiniMax Image to Video",
            category="api node/video/MiniMax",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="Image to use as first frame of video generation",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt_text",
                    multiline=True,
                    default="",
                    tooltip="Text prompt to guide the video generation",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["I2V-01-Director", "I2V-01", "I2V-01-live"],
                    default="I2V-01",
                    tooltip="Model to use for video generation",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
@ -243,11 +243,11 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
                    optional=True,
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -259,7 +259,7 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
        prompt_text: str,
        model: str = "I2V-01",
        seed: int = 0,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        return await _generate_mm_video(
            auth={
                "auth_token": cls.hidden.auth_token_comfy_org,
@ -275,36 +275,36 @@ class MinimaxImageToVideoNode(comfy_io.ComfyNode):
        )


-class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
+class MinimaxSubjectToVideoNode(IO.ComfyNode):
    """
    Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
    """

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MinimaxSubjectToVideoNode",
            display_name="MiniMax Subject to Video",
            category="api node/video/MiniMax",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "subject",
                    tooltip="Image of subject to reference for video generation",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt_text",
                    multiline=True,
                    default="",
                    tooltip="Text prompt to guide the video generation",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["S2V-01"],
                    default="S2V-01",
                    tooltip="Model to use for video generation",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
@ -315,11 +315,11 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
                    optional=True,
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -331,7 +331,7 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
        prompt_text: str,
        model: str = "S2V-01",
        seed: int = 0,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        return await _generate_mm_video(
            auth={
                "auth_token": cls.hidden.auth_token_comfy_org,
@ -347,24 +347,24 @@ class MinimaxSubjectToVideoNode(comfy_io.ComfyNode):
        )


-class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
+class MinimaxHailuoVideoNode(IO.ComfyNode):
    """Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model."""

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MinimaxHailuoVideoNode",
            display_name="MiniMax Hailuo Video",
            category="api node/video/MiniMax",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt_text",
                    multiline=True,
                    default="",
                    tooltip="Text prompt to guide the video generation.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
@ -374,25 +374,25 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
                    tooltip="The random seed used for creating the noise.",
                    optional=True,
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "first_frame_image",
                    tooltip="Optional image to use as the first frame to generate a video.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "prompt_optimizer",
                    default=True,
                    tooltip="Optimize prompt to improve generation quality when needed.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "duration",
                    options=[6, 10],
                    default=6,
                    tooltip="The length of the output video in seconds.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=["768P", "1080P"],
                    default="768P",
@ -400,11 +400,11 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
                    optional=True,
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -419,7 +419,7 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
        duration: int = 6,
        resolution: str = "768P",
        model: str = "MiniMax-Hailuo-02",
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        auth = {
            "auth_token": cls.hidden.auth_token_comfy_org,
            "comfy_api_key": cls.hidden.api_key_comfy_org,
@ -500,7 +500,7 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
            raise Exception(
                f"No video was found in the response. Full response: {file_result.model_dump()}"
            )
-        logging.info(f"Generated video URL: {file_url}")
+        logging.info("Generated video URL: %s", file_url)
        if cls.hidden.unique_id:
            if hasattr(file_result.file, "backup_download_url"):
                message = f"Result URL: {file_url}\nBackup URL: {file_result.file.backup_download_url}"
@ -513,12 +513,12 @@ class MinimaxHailuoVideoNode(comfy_io.ComfyNode):
            error_msg = f"Failed to download video from {file_url}"
            logging.error(error_msg)
            raise Exception(error_msg)
-        return comfy_io.NodeOutput(VideoFromFile(video_io))
+        return IO.NodeOutput(VideoFromFile(video_io))


 class MinimaxExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            MinimaxTextToVideoNode,
            MinimaxImageToVideoNode,
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@ -2,11 +2,7 @@ import logging
 from typing import Any, Callable, Optional, TypeVar
 import torch
 from typing_extensions import override
-from comfy_api_nodes.util.validation_utils import (
-    get_image_dimensions,
-    validate_image_dimensions,
-)
-
+from comfy_api_nodes.util.validation_utils import validate_image_dimensions

 from comfy_api_nodes.apis import (
    MoonvalleyTextToVideoRequest,
@ -26,10 +22,11 @@ from comfy_api_nodes.apinode_utils import (
    download_url_to_video_output,
    upload_images_to_comfyapi,
    upload_video_to_comfyapi,
+    validate_container_format_is_mp4,
 )

 from comfy_api.input import VideoInput
-from comfy_api.latest import ComfyExtension, InputImpl, io as comfy_io
+from comfy_api.latest import ComfyExtension, InputImpl, IO
 import av
 import io

@ -132,47 +129,6 @@ def validate_prompts(
    return True


-def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
-    # inference validation
-    # T = num_frames
-    # in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
-    # with image conditioning: H*W must be divisible by 8192
-    # without image conditioning: T divisible by 32
-    if num_frames_in and not num_frames_in % 16 == 0:
-        return False, ("The input video total frame count must be divisible by 16!")
-
-    if height % 8 != 0 or width % 8 != 0:
-        return False, (
-            f"Height ({height}) and width ({width}) must be " "divisible by 8"
-        )
-
-    if with_frame_conditioning:
-        if (height * width) % 8192 != 0:
-            return False, (
-                f"Height * width ({height * width}) must be "
-                "divisible by 8192 for frame conditioning"
-            )
-    else:
-        if num_frames_in and not num_frames_in % 32 == 0:
-            return False, ("The input video total frame count must be divisible by 32!")
-
-
-def validate_input_image(
-    image: torch.Tensor, with_frame_conditioning: bool = False
-) -> None:
-    """
-    Validates the input image adheres to the expectations of the API:
-    - The image resolution should not be less than 300*300px
-    - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
-
-    """
-    height, width = get_image_dimensions(image)
-    validate_input_media(width, height, with_frame_conditioning)
-    validate_image_dimensions(
-        image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
-    )
-
-
 def validate_video_to_video_input(video: VideoInput) -> VideoInput:
    """
    Validates and processes video input for Moonvalley Video-to-Video generation.
@ -189,7 +145,7 @@ def validate_video_to_video_input(video: VideoInput) -> VideoInput:
    """
    width, height = _get_video_dimensions(video)
    _validate_video_dimensions(width, height)
-    _validate_container_format(video)
+    validate_container_format_is_mp4(video)

    return _validate_and_trim_duration(video)

@ -222,15 +178,6 @@ def _validate_video_dimensions(width: int, height: int) -> None:
        )


-def _validate_container_format(video: VideoInput) -> None:
-    """Validates video container format is MP4."""
-    container_format = video.get_container_format()
-    if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
-        raise ValueError(
-            f"Only MP4 container format supported. Got: {container_format}"
-        )
-
-
 def _validate_and_trim_duration(video: VideoInput) -> VideoInput:
    """Validates video duration and trims to 5 seconds if needed."""
    duration = video.get_duration()
@ -282,7 +229,7 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
        audio_stream = None

        for stream in input_container.streams:
-            logging.info(f"Found stream: type={stream.type}, class={type(stream)}")
+            logging.info("Found stream: type=%s, class=%s", stream.type, type(stream))
            if isinstance(stream, av.VideoStream):
                # Create output video stream with same parameters
                video_stream = output_container.add_stream(
@ -292,7 +239,7 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
                video_stream.height = stream.height
                video_stream.pix_fmt = "yuv420p"
                logging.info(
-                    f"Added video stream: {stream.width}x{stream.height} @ {stream.average_rate}fps"
+                    "Added video stream: %sx%s @ %sfps", stream.width, stream.height, stream.average_rate
                )
            elif isinstance(stream, av.AudioStream):
                # Create output audio stream with same parameters
@ -301,9 +248,7 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
                )
                audio_stream.sample_rate = stream.sample_rate
                audio_stream.layout = stream.layout
-                logging.info(
-                    f"Added audio stream: {stream.sample_rate}Hz, {stream.channels} channels"
-                )
+                logging.info("Added audio stream: %sHz, %s channels", stream.sample_rate, stream.channels)

        # Calculate target frame count that's divisible by 16
        fps = input_container.streams.video[0].average_rate
@ -333,9 +278,7 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
            for packet in video_stream.encode():
                output_container.mux(packet)

-            logging.info(
-                f"Encoded {frame_count} video frames (target: {target_frames})"
-            )
+            logging.info("Encoded %s video frames (target: %s)", frame_count, target_frames)

        # Decode and re-encode audio frames
        if audio_stream:
@ -353,7 +296,7 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
            for packet in audio_stream.encode():
                output_container.mux(packet)

-            logging.info(f"Encoded {audio_frame_count} audio frames")
+            logging.info("Encoded %s audio frames", audio_frame_count)

        # Close containers
        output_container.close()
@ -380,7 +323,7 @@ def parse_width_height_from_res(resolution: str):
        "1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
        "4:3 (1536 x 1152)": {"width": 1536, "height": 1152},
        "3:4 (1152 x 1536)": {"width": 1152, "height": 1536},
-        "21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
+        # "21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
    }
    return res_map.get(resolution, {"width": 1920, "height": 1080})

@ -411,36 +354,36 @@ async def get_response(
    )


-class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
+class MoonvalleyImg2VideoNode(IO.ComfyNode):

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MoonvalleyImg2VideoNode",
            display_name="Moonvalley Marey Image to Video",
            category="api node/video/Moonvalley Marey",
            description="Moonvalley Marey Image to Video Node",
            inputs=[
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="The reference image used to generate the video",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                            "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                            "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                            "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                            "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                            "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
+                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
+                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
+                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
+                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
+                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
                    tooltip="Negative prompt text",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=[
                        "16:9 (1920 x 1080)",
@ -448,42 +391,43 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
                        "1:1 (1152 x 1152)",
                        "4:3 (1536 x 1152)",
                        "3:4 (1152 x 1536)",
-                        "21:9 (2560 x 1080)",
+                        # "21:9 (2560 x 1080)",
                    ],
                    default="16:9 (1920 x 1080)",
                    tooltip="Resolution of the output video",
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "prompt_adherence",
-                    default=10.0,
+                    default=4.5,
                    min=1.0,
                    max=20.0,
                    step=1.0,
                    tooltip="Guidance scale for generation control",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=9,
                    min=0,
                    max=4294967295,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Random seed value",
+                    control_after_generate=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "steps",
-                    default=100,
+                    default=33,
                    min=1,
                    max=100,
                    step=1,
                    tooltip="Number of denoising steps",
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -498,8 +442,8 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
        prompt_adherence: float,
        seed: int,
        steps: int,
-    ) -> comfy_io.NodeOutput:
-        validate_input_image(image, True)
+    ) -> IO.NodeOutput:
+        validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
        width_height = parse_width_height_from_res(resolution)

@ -513,12 +457,11 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
            steps=steps,
            seed=seed,
            guidance_scale=prompt_adherence,
-            num_frames=128,
            width=width_height["width"],
            height=width_height["height"],
            use_negative_prompts=True,
        )
-        """Upload image to comfy backend to have a URL available for further processing"""
+
        # Get MIME type from tensor - assuming PNG format for image tensors
        mime_type = "image/png"

@ -549,57 +492,57 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
            task_id, auth_kwargs=auth, node_id=cls.hidden.unique_id
        )
        video = await download_url_to_video_output(final_response.output_url)
-        return comfy_io.NodeOutput(video)
+        return IO.NodeOutput(video)


-class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
+class MoonvalleyVideo2VideoNode(IO.ComfyNode):

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MoonvalleyVideo2VideoNode",
            display_name="Moonvalley Marey Video to Video",
            category="api node/video/Moonvalley Marey",
            description="",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="Describes the video to generate",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                            "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                            "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                            "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                            "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                            "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
+                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
+                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
+                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
+                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
+                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
                    tooltip="Negative prompt text",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=9,
                    min=0,
                    max=4294967295,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Random seed value",
                    control_after_generate=False,
                ),
-                comfy_io.Video.Input(
+                IO.Video.Input(
                    "video",
                    tooltip="The reference video used to generate the output video. Must be at least 5 seconds long. "
-                            "Videos longer than 5s will be automatically trimmed. Only MP4 format supported.",
+                    "Videos longer than 5s will be automatically trimmed. Only MP4 format supported.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "control_type",
                    options=["Motion Transfer", "Pose Transfer"],
                    default="Motion Transfer",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "motion_intensity",
                    default=100,
                    min=0,
@ -608,12 +551,21 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
                    tooltip="Only used if control_type is 'Motion Transfer'",
                    optional=True,
                ),
+                IO.Int.Input(
+                    "steps",
+                    default=33,
+                    min=1,
+                    max=100,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    tooltip="Number of inference steps",
+                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -627,7 +579,9 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
        video: Optional[VideoInput] = None,
        control_type: str = "Motion Transfer",
        motion_intensity: Optional[int] = 100,
-    ) -> comfy_io.NodeOutput:
+        steps=33,
+        prompt_adherence=4.5,
+    ) -> IO.NodeOutput:
        auth = {
            "auth_token": cls.hidden.auth_token_comfy_org,
            "comfy_api_key": cls.hidden.api_key_comfy_org,
@ -636,7 +590,6 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
        validated_video = validate_video_to_video_input(video)
        video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)

-        """Validate prompts and inference input"""
        validate_prompts(prompt, negative_prompt)

        # Only include motion_intensity for Motion Transfer
@ -648,6 +601,8 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
            negative_prompt=negative_prompt,
            seed=seed,
            control_params=control_params,
+            steps=steps,
+            guidance_scale=prompt_adherence,
        )

        control = parse_control_parameter(control_type)
@ -678,35 +633,35 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
        )

        video = await download_url_to_video_output(final_response.output_url)
-        return comfy_io.NodeOutput(video)
+        return IO.NodeOutput(video)


-class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
+class MoonvalleyTxt2VideoNode(IO.ComfyNode):

    @classmethod
-    def define_schema(cls) -> comfy_io.Schema:
-        return comfy_io.Schema(
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
            node_id="MoonvalleyTxt2VideoNode",
            display_name="Moonvalley Marey Text to Video",
            category="api node/video/Moonvalley Marey",
            description="",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                            "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                            "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                            "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                            "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                            "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
+                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
+                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
+                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
+                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
+                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
                    tooltip="Negative prompt text",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=[
                        "16:9 (1920 x 1080)",
@ -719,37 +674,38 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
                    default="16:9 (1920 x 1080)",
                    tooltip="Resolution of the output video",
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "prompt_adherence",
-                    default=10.0,
+                    default=4.0,
                    min=1.0,
                    max=20.0,
                    step=1.0,
                    tooltip="Guidance scale for generation control",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=9,
                    min=0,
                    max=4294967295,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
                    tooltip="Random seed value",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "steps",
-                    default=100,
+                    default=33,
                    min=1,
                    max=100,
                    step=1,
                    tooltip="Inference steps",
                ),
            ],
-            outputs=[comfy_io.Video.Output()],
+            outputs=[IO.Video.Output()],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -763,7 +719,7 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
        prompt_adherence: float,
        seed: int,
        steps: int,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
        width_height = parse_width_height_from_res(resolution)

@ -804,12 +760,12 @@ class MoonvalleyTxt2VideoNode(comfy_io.ComfyNode):
        )

        video = await download_url_to_video_output(final_response.output_url)
-        return comfy_io.NodeOutput(video)
+        return IO.NodeOutput(video)


 class MoonvalleyExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            MoonvalleyImg2VideoNode,
            MoonvalleyTxt2VideoNode,
--- a/comfy_api_nodes/nodes_pika.py
+++ b/comfy_api_nodes/nodes_pika.py
--- a/comfy_api_nodes/nodes_pixverse.py
+++ b/comfy_api_nodes/nodes_pixverse.py
@ -1,5 +1,7 @@
 from inspect import cleandoc
 from typing import Optional
+from typing_extensions import override
+from io import BytesIO
 from comfy_api_nodes.apis.pixverse_api import (
    PixverseTextVideoRequest,
    PixverseImageVideoRequest,
@ -26,12 +28,11 @@ from comfy_api_nodes.apinode_utils import (
    tensor_to_bytesio,
    validate_string,
 )
-from comfy.comfy_types.node_typing import IO, ComfyNodeABC
 from comfy_api.input_impl import VideoFromFile
+from comfy_api.latest import ComfyExtension, IO

 import torch
 import aiohttp
-from io import BytesIO


 AVERAGE_DURATION_T2V = 32
@ -72,100 +73,101 @@ async def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None):
    return response_upload.Resp.img_id


-class PixverseTemplateNode:
+class PixverseTemplateNode(IO.ComfyNode):
    """
    Select template for PixVerse Video generation.
    """

-    RETURN_TYPES = (PixverseIO.TEMPLATE,)
-    RETURN_NAMES = ("pixverse_template",)
-    FUNCTION = "create_template"
-    CATEGORY = "api node/video/PixVerse"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="PixverseTemplateNode",
+            display_name="PixVerse Template",
+            category="api node/video/PixVerse",
+            inputs=[
+                IO.Combo.Input("template", options=list(pixverse_templates.keys())),
+            ],
+            outputs=[IO.Custom(PixverseIO.TEMPLATE).Output(display_name="pixverse_template")],
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "template": (list(pixverse_templates.keys()),),
-            }
-        }
-
-    def create_template(self, template: str):
+    def execute(cls, template: str) -> IO.NodeOutput:
        template_id = pixverse_templates.get(template, None)
        if template_id is None:
            raise Exception(f"Template '{template}' is not recognized.")
        # just return the integer
-        return (template_id,)
+        return IO.NodeOutput(template_id)


-class PixverseTextToVideoNode(ComfyNodeABC):
+class PixverseTextToVideoNode(IO.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """

-    RETURN_TYPES = (IO.VIDEO,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/video/PixVerse"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="PixverseTextToVideoNode",
+            display_name="PixVerse Text to Video",
+            category="api node/video/PixVerse",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the video generation",
+                ),
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=PixverseAspectRatio,
+                ),
+                IO.Combo.Input(
+                    "quality",
+                    options=PixverseQuality,
+                    default=PixverseQuality.res_540p,
+                ),
+                IO.Combo.Input(
+                    "duration_seconds",
+                    options=PixverseDuration,
+                ),
+                IO.Combo.Input(
+                    "motion_mode",
+                    options=PixverseMotionMode,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed for video generation.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    multiline=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+                IO.Custom(PixverseIO.TEMPLATE).Input(
+                    "pixverse_template",
+                    tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the video generation",
-                    },
-                ),
-                "aspect_ratio": ([ratio.value for ratio in PixverseAspectRatio],),
-                "quality": (
-                    [resolution.value for resolution in PixverseQuality],
-                    {
-                        "default": PixverseQuality.res_540p,
-                    },
-                ),
-                "duration_seconds": ([dur.value for dur in PixverseDuration],),
-                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 2147483647,
-                        "control_after_generate": True,
-                        "tooltip": "Seed for video generation.",
-                    },
-                ),
-            },
-            "optional": {
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-                "pixverse_template": (
-                    PixverseIO.TEMPLATE,
-                    {
-                        "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        prompt: str,
        aspect_ratio: str,
        quality: str,
@ -174,9 +176,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
        seed,
        negative_prompt: str = None,
        pixverse_template: int = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -186,6 +186,10 @@ class PixverseTextToVideoNode(ComfyNodeABC):
        elif duration_seconds != PixverseDuration.dur_5:
            motion_mode = PixverseMotionMode.normal

+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
        operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path="/proxy/pixverse/video/text/generate",
@ -203,7 +207,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
                template_id=pixverse_template,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()

@ -224,8 +228,8 @@ class PixverseTextToVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
-            node_id=unique_id,
+            auth_kwargs=auth,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_T2V,
        )
@ -233,77 +237,75 @@ class PixverseTextToVideoNode(ComfyNodeABC):

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))


-class PixverseImageToVideoNode(ComfyNodeABC):
+class PixverseImageToVideoNode(IO.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """

-    RETURN_TYPES = (IO.VIDEO,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/video/PixVerse"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="PixverseImageToVideoNode",
+            display_name="PixVerse Image to Video",
+            category="api node/video/PixVerse",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.Image.Input("image"),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the video generation",
+                ),
+                IO.Combo.Input(
+                    "quality",
+                    options=PixverseQuality,
+                    default=PixverseQuality.res_540p,
+                ),
+                IO.Combo.Input(
+                    "duration_seconds",
+                    options=PixverseDuration,
+                ),
+                IO.Combo.Input(
+                    "motion_mode",
+                    options=PixverseMotionMode,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed for video generation.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    multiline=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+                IO.Custom(PixverseIO.TEMPLATE).Input(
+                    "pixverse_template",
+                    tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE,),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the video generation",
-                    },
-                ),
-                "quality": (
-                    [resolution.value for resolution in PixverseQuality],
-                    {
-                        "default": PixverseQuality.res_540p,
-                    },
-                ),
-                "duration_seconds": ([dur.value for dur in PixverseDuration],),
-                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 2147483647,
-                        "control_after_generate": True,
-                        "tooltip": "Seed for video generation.",
-                    },
-                ),
-            },
-            "optional": {
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-                "pixverse_template": (
-                    PixverseIO.TEMPLATE,
-                    {
-                        "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        image: torch.Tensor,
        prompt: str,
        quality: str,
@ -312,11 +314,13 @@ class PixverseImageToVideoNode(ComfyNodeABC):
        seed,
        negative_prompt: str = None,
        pixverse_template: int = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
-        img_id = await upload_image_to_pixverse(image, auth_kwargs=kwargs)
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
+        img_id = await upload_image_to_pixverse(image, auth_kwargs=auth)

        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -343,7 +347,7 @@ class PixverseImageToVideoNode(ComfyNodeABC):
                template_id=pixverse_template,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()

@ -364,8 +368,8 @@ class PixverseImageToVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
-            node_id=unique_id,
+            auth_kwargs=auth,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_I2V,
        )
@ -373,72 +377,71 @@ class PixverseImageToVideoNode(ComfyNodeABC):

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))


-class PixverseTransitionVideoNode(ComfyNodeABC):
+class PixverseTransitionVideoNode(IO.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """

-    RETURN_TYPES = (IO.VIDEO,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/video/PixVerse"
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="PixverseTransitionVideoNode",
+            display_name="PixVerse Transition Video",
+            category="api node/video/PixVerse",
+            description=cleandoc(cls.__doc__ or ""),
+            inputs=[
+                IO.Image.Input("first_frame"),
+                IO.Image.Input("last_frame"),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt for the video generation",
+                ),
+                IO.Combo.Input(
+                    "quality",
+                    options=PixverseQuality,
+                    default=PixverseQuality.res_540p,
+                ),
+                IO.Combo.Input(
+                    "duration_seconds",
+                    options=PixverseDuration,
+                ),
+                IO.Combo.Input(
+                    "motion_mode",
+                    options=PixverseMotionMode,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed for video generation.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    multiline=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "first_frame": (IO.IMAGE,),
-                "last_frame": (IO.IMAGE,),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the video generation",
-                    },
-                ),
-                "quality": (
-                    [resolution.value for resolution in PixverseQuality],
-                    {
-                        "default": PixverseQuality.res_540p,
-                    },
-                ),
-                "duration_seconds": ([dur.value for dur in PixverseDuration],),
-                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 2147483647,
-                        "control_after_generate": True,
-                        "tooltip": "Seed for video generation.",
-                    },
-                ),
-            },
-            "optional": {
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
        first_frame: torch.Tensor,
        last_frame: torch.Tensor,
        prompt: str,
@ -447,12 +450,14 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
        motion_mode: str,
        seed,
        negative_prompt: str = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
-        first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=kwargs)
-        last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=kwargs)
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
+        first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=auth)
+        last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=auth)

        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -479,7 +484,7 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
                negative_prompt=negative_prompt if negative_prompt else None,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()

@ -500,8 +505,8 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
-            node_id=unique_id,
+            auth_kwargs=auth,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_T2V,
        )
@ -509,19 +514,19 @@ class PixverseTransitionVideoNode(ComfyNodeABC):

        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))


-NODE_CLASS_MAPPINGS = {
-    "PixverseTextToVideoNode": PixverseTextToVideoNode,
-    "PixverseImageToVideoNode": PixverseImageToVideoNode,
-    "PixverseTransitionVideoNode": PixverseTransitionVideoNode,
-    "PixverseTemplateNode": PixverseTemplateNode,
-}
+class PixVerseExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            PixverseTextToVideoNode,
+            PixverseImageToVideoNode,
+            PixverseTransitionVideoNode,
+            PixverseTemplateNode,
+        ]

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "PixverseTextToVideoNode": "PixVerse Text to Video",
-    "PixverseImageToVideoNode": "PixVerse Image to Video",
-    "PixverseTransitionVideoNode": "PixVerse Transition Video",
-    "PixverseTemplateNode": "PixVerse Template",
-}
+
+async def comfy_entrypoint() -> PixVerseExtension:
+    return PixVerseExtension()
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@ -35,57 +35,64 @@ from comfy.cmd.server import PromptServer
 import torch
 from io import BytesIO
 from PIL import UnidentifiedImageError
+import aiohttp


 async def handle_recraft_file_request(
-        image: torch.Tensor,
-        path: str,
-        mask: torch.Tensor=None,
-        total_pixels=4096*4096,
-        timeout=1024,
-        request=None,
-        auth_kwargs: dict[str,str] = None,
-    ) -> list[BytesIO]:
-        """
-        Handle sending common Recraft file-only request to get back file bytes.
-        """
-        if request is None:
-            request = EmptyRequest()
-
-        files = {
-            'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
-        }
-        if mask is not None:
-            files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
-
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=path,
-                method=HttpMethod.POST,
-                request_model=type(request),
-                response_model=RecraftImageGenerationResponse,
-            ),
-            request=request,
-            files=files,
-            content_type="multipart/form-data",
-            auth_kwargs=auth_kwargs,
-            multipart_parser=recraft_multipart_parser,
-        )
-        response: RecraftImageGenerationResponse = await operation.execute()
-        all_bytesio = []
-        if response.image is not None:
-            all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
-        else:
-            for data in response.data:
-                all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
-
-        return all_bytesio
-
-
-def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, converted_to_check: list[list]=None, is_list=False) -> dict:
+    image: torch.Tensor,
+    path: str,
+    mask: torch.Tensor=None,
+    total_pixels=4096*4096,
+    timeout=1024,
+    request=None,
+    auth_kwargs: dict[str,str] = None,
+) -> list[BytesIO]:
    """
-    Formats data such that multipart/form-data will work with requests library
-    when both files and data are present.
+    Handle sending common Recraft file-only request to get back file bytes.
+    """
+    if request is None:
+        request = EmptyRequest()
+
+    files = {
+        'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
+    }
+    if mask is not None:
+        files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
+
+    operation = SynchronousOperation(
+        endpoint=ApiEndpoint(
+            path=path,
+            method=HttpMethod.POST,
+            request_model=type(request),
+            response_model=RecraftImageGenerationResponse,
+        ),
+        request=request,
+        files=files,
+        content_type="multipart/form-data",
+        auth_kwargs=auth_kwargs,
+        multipart_parser=recraft_multipart_parser,
+    )
+    response: RecraftImageGenerationResponse = await operation.execute()
+    all_bytesio = []
+    if response.image is not None:
+        all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
+    else:
+        for data in response.data:
+            all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
+
+    return all_bytesio
+
+
+def recraft_multipart_parser(
+    data,
+    parent_key=None,
+    formatter: callable = None,
+    converted_to_check: list[list] = None,
+    is_list: bool = False,
+    return_mode: str = "formdata"  # "dict" | "formdata"
+) -> dict | aiohttp.FormData:
+    """
+    Formats data such that multipart/form-data will work with aiohttp library when both files and data are present.

    The OpenAI client that Recraft uses has a bizarre way of serializing lists:

@ -103,23 +110,23 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
    # Modification of a function that handled a different type of multipart parsing, big ups:
    # https://gist.github.com/kazqvaizer/4cebebe5db654a414132809f9f88067b

-    def handle_converted_lists(data, parent_key, lists_to_check=tuple[list]):
+    def handle_converted_lists(item, parent_key, lists_to_check=tuple[list]):
        # if list already exists exists, just extend list with data
        for check_list in lists_to_check:
            for conv_tuple in check_list:
-                if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list:
-                    conv_tuple[1].append(formatter(data))
+                if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
+                    conv_tuple[1].append(formatter(item))
                    return True
        return False

    if converted_to_check is None:
        converted_to_check = []

-
+    effective_mode = return_mode if parent_key is None else "dict"
    if formatter is None:
        formatter = lambda v: v  # Multipart representation of value

-    if type(data) is not dict:
+    if not isinstance(data, dict):
        # if list already exists exists, just extend list with data
        added = handle_converted_lists(data, parent_key, converted_to_check)
        if added:
@ -136,15 +143,24 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co

    for key, value in data.items():
        current_key = key if parent_key is None else f"{parent_key}[{key}]"
-        if type(value) is dict:
+        if isinstance(value, dict):
            converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
-        elif type(value) is list:
+        elif isinstance(value, list):
            for ind, list_value in enumerate(value):
                iter_key = f"{current_key}[]"
                converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
        else:
            converted.append((current_key, formatter(value)))

+    if effective_mode == "formdata":
+        fd = aiohttp.FormData()
+        for k, v in dict(converted).items():
+            if isinstance(v, list):
+                for item in v:
+                    fd.add_field(k, str(item))
+            else:
+                fd.add_field(k, str(v))
+        return fd
    return dict(converted)


--- a/comfy_api_nodes/nodes_rodin.py
+++ b/comfy_api_nodes/nodes_rodin.py
--- a/comfy_api_nodes/nodes_runway.py
+++ b/comfy_api_nodes/nodes_runway.py
@ -48,7 +48,7 @@ from comfy_api_nodes.apinode_utils import (
    download_url_to_image_tensor,
 )
 from comfy_api.input_impl import VideoFromFile
-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api_nodes.util.validation_utils import validate_image_dimensions, validate_image_aspect_ratio

 PATH_IMAGE_TO_VIDEO = "/proxy/runway/image_to_video"
@ -175,11 +175,11 @@ async def generate_video(
    return await download_url_to_video_output(video_url)


-class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
+class RunwayImageToVideoNodeGen3a(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="RunwayImageToVideoNodeGen3a",
            display_name="Runway Image to Video (Gen3a Turbo)",
            category="api node/video/Runway",
@ -188,42 +188,42 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
                        "your input selections will set your generation up for success: "
                        "https://help.runwayml.com/hc/en-us/articles/33927968552339-Creating-with-Act-One-on-Gen-3-Alpha-and-Turbo.",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for the generation",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "start_frame",
                    tooltip="Start frame to be used for the video",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "duration",
-                    options=[model.value for model in Duration],
+                    options=Duration,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "ratio",
-                    options=[model.value for model in RunwayGen3aAspectRatio],
+                    options=RunwayGen3aAspectRatio,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967295,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Random seed for generation",
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -236,7 +236,7 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
        duration: str,
        ratio: str,
        seed: int,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, min_length=1)
        validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
        validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
@ -253,7 +253,7 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
            auth_kwargs=auth_kwargs,
        )

-        return comfy_io.NodeOutput(
+        return IO.NodeOutput(
            await generate_video(
                RunwayImageToVideoRequest(
                    promptText=prompt,
@ -275,11 +275,11 @@ class RunwayImageToVideoNodeGen3a(comfy_io.ComfyNode):
        )


-class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
+class RunwayImageToVideoNodeGen4(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="RunwayImageToVideoNodeGen4",
            display_name="Runway Image to Video (Gen4 Turbo)",
            category="api node/video/Runway",
@ -288,42 +288,42 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
                        "your input selections will set your generation up for success: "
                        "https://help.runwayml.com/hc/en-us/articles/37327109429011-Creating-with-Gen-4-Video.",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for the generation",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "start_frame",
                    tooltip="Start frame to be used for the video",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "duration",
-                    options=[model.value for model in Duration],
+                    options=Duration,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "ratio",
-                    options=[model.value for model in RunwayGen4TurboAspectRatio],
+                    options=RunwayGen4TurboAspectRatio,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967295,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Random seed for generation",
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -336,7 +336,7 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
        duration: str,
        ratio: str,
        seed: int,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, min_length=1)
        validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
        validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
@ -353,7 +353,7 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
            auth_kwargs=auth_kwargs,
        )

-        return comfy_io.NodeOutput(
+        return IO.NodeOutput(
            await generate_video(
                RunwayImageToVideoRequest(
                    promptText=prompt,
@ -376,11 +376,11 @@ class RunwayImageToVideoNodeGen4(comfy_io.ComfyNode):
        )


-class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
+class RunwayFirstLastFrameNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="RunwayFirstLastFrameNode",
            display_name="Runway First-Last-Frame to Video",
            category="api node/video/Runway",
@ -392,46 +392,46 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
                        "will set your generation up for success: "
                        "https://help.runwayml.com/hc/en-us/articles/34170748696595-Creating-with-Keyframes-on-Gen-3.",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for the generation",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "start_frame",
                    tooltip="Start frame to be used for the video",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "end_frame",
                    tooltip="End frame to be used for the video. Supported for gen3a_turbo only.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "duration",
-                    options=[model.value for model in Duration],
+                    options=Duration,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "ratio",
-                    options=[model.value for model in RunwayGen3aAspectRatio],
+                    options=RunwayGen3aAspectRatio,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967295,
                    step=1,
                    control_after_generate=True,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Random seed for generation",
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -445,7 +445,7 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
        duration: str,
        ratio: str,
        seed: int,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, min_length=1)
        validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
        validate_image_dimensions(end_frame, max_width=7999, max_height=7999)
@ -467,7 +467,7 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
        if len(download_urls) != 2:
            raise RunwayApiError("Failed to upload one or more images to comfy api.")

-        return comfy_io.NodeOutput(
+        return IO.NodeOutput(
            await generate_video(
                RunwayImageToVideoRequest(
                    promptText=prompt,
@ -493,40 +493,40 @@ class RunwayFirstLastFrameNode(comfy_io.ComfyNode):
        )


-class RunwayTextToImageNode(comfy_io.ComfyNode):
+class RunwayTextToImageNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="RunwayTextToImageNode",
            display_name="Runway Text to Image",
            category="api node/image/Runway",
            description="Generate an image from a text prompt using Runway's Gen 4 model. "
                        "You can also include reference image to guide the generation.",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for the generation",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "ratio",
                    options=[model.value for model in RunwayTextToImageAspectRatioEnum],
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "reference_image",
                    tooltip="Optional reference image to guide the generation",
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -537,7 +537,7 @@ class RunwayTextToImageNode(comfy_io.ComfyNode):
        prompt: str,
        ratio: str,
        reference_image: Optional[torch.Tensor] = None,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, min_length=1)

        auth_kwargs = {
@ -588,12 +588,12 @@ class RunwayTextToImageNode(comfy_io.ComfyNode):
        if not final_response.output:
            raise RunwayApiError("Runway task succeeded but no image data found in response.")

-        return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response)))
+        return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response)))


 class RunwayExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            RunwayFirstLastFrameNode,
            RunwayImageToVideoNodeGen3a,
--- a/comfy_api_nodes/nodes_sora.py
+++ b/comfy_api_nodes/nodes_sora.py
@ -0,0 +1,175 @@
+from typing import Optional
+from typing_extensions import override
+
+import torch
+from pydantic import BaseModel, Field
+from comfy_api.latest import ComfyExtension, IO
+from comfy_api_nodes.apis.client import (
+    ApiEndpoint,
+    HttpMethod,
+    SynchronousOperation,
+    PollingOperation,
+    EmptyRequest,
+)
+from comfy_api_nodes.util.validation_utils import get_number_of_images
+
+from comfy_api_nodes.apinode_utils import (
+    download_url_to_video_output,
+    tensor_to_bytesio,
+)
+
+class Sora2GenerationRequest(BaseModel):
+    prompt: str = Field(...)
+    model: str = Field(...)
+    seconds: str = Field(...)
+    size: str = Field(...)
+
+
+class Sora2GenerationResponse(BaseModel):
+    id: str = Field(...)
+    error: Optional[dict] = Field(None)
+    status: Optional[str] = Field(None)
+
+
+class OpenAIVideoSora2(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="OpenAIVideoSora2",
+            display_name="OpenAI Sora - Video",
+            category="api node/video/Sora",
+            description="OpenAI video and audio generation.",
+            inputs=[
+                IO.Combo.Input(
+                    "model",
+                    options=["sora-2", "sora-2-pro"],
+                    default="sora-2",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Guiding text; may be empty if an input image is present.",
+                ),
+                IO.Combo.Input(
+                    "size",
+                    options=[
+                        "720x1280",
+                        "1280x720",
+                        "1024x1792",
+                        "1792x1024",
+                    ],
+                    default="1280x720",
+                ),
+                IO.Combo.Input(
+                    "duration",
+                    options=[4, 8, 12],
+                    default=8,
+                ),
+                IO.Image.Input(
+                    "image",
+                    optional=True,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    optional=True,
+                    tooltip="Seed to determine if node should re-run; "
+                            "actual results are nondeterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        prompt: str,
+        size: str = "1280x720",
+        duration: int = 8,
+        seed: int = 0,
+        image: Optional[torch.Tensor] = None,
+    ):
+        if model == "sora-2" and size not in ("720x1280", "1280x720"):
+            raise ValueError("Invalid size for sora-2 model, only 720x1280 and 1280x720 are supported.")
+        files_input = None
+        if image is not None:
+            if get_number_of_images(image) != 1:
+                raise ValueError("Currently only one input image is supported.")
+            files_input = {"input_reference": ("image.png", tensor_to_bytesio(image), "image/png")}
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
+        payload = Sora2GenerationRequest(
+            model=model,
+            prompt=prompt,
+            seconds=str(duration),
+            size=size,
+        )
+        initial_operation = SynchronousOperation(
+            endpoint=ApiEndpoint(
+                path="/proxy/openai/v1/videos",
+                method=HttpMethod.POST,
+                request_model=Sora2GenerationRequest,
+                response_model=Sora2GenerationResponse
+            ),
+            request=payload,
+            files=files_input,
+            auth_kwargs=auth,
+            content_type="multipart/form-data",
+        )
+        initial_response = await initial_operation.execute()
+        if initial_response.error:
+            raise Exception(initial_response.error.message)
+
+        model_time_multiplier = 1 if model == "sora-2" else 2
+        poll_operation = PollingOperation(
+            poll_endpoint=ApiEndpoint(
+                path=f"/proxy/openai/v1/videos/{initial_response.id}",
+                method=HttpMethod.GET,
+                request_model=EmptyRequest,
+                response_model=Sora2GenerationResponse
+            ),
+            completed_statuses=["completed"],
+            failed_statuses=["failed"],
+            status_extractor=lambda x: x.status,
+            auth_kwargs=auth,
+            poll_interval=8.0,
+            max_poll_attempts=160,
+            node_id=cls.hidden.unique_id,
+            estimated_duration=45 * (duration / 4) * model_time_multiplier,
+        )
+        await poll_operation.execute()
+        return IO.NodeOutput(
+            await download_url_to_video_output(
+                f"/proxy/openai/v1/videos/{initial_response.id}/content",
+                auth_kwargs=auth,
+            )
+        )
+
+
+class OpenAISoraExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            OpenAIVideoSora2,
+        ]
+
+
+async def comfy_entrypoint() -> OpenAISoraExtension:
+    return OpenAISoraExtension()
--- a/comfy_api_nodes/nodes_stability.py
+++ b/comfy_api_nodes/nodes_stability.py
@ -2,7 +2,7 @@ from inspect import cleandoc
 from typing import Optional
 from typing_extensions import override

-from comfy_api.latest import ComfyExtension, Input, io as comfy_io
+from comfy_api.latest import ComfyExtension, Input, IO
 from comfy_api_nodes.apis.stability_api import (
    StabilityUpscaleConservativeRequest,
    StabilityUpscaleCreativeRequest,
@ -56,20 +56,20 @@ def get_async_dummy_status(x: StabilityResultsGetResponse):
    return StabilityPollStatus.in_progress


-class StabilityStableImageUltraNode(comfy_io.ComfyNode):
+class StabilityStableImageUltraNode(IO.ComfyNode):
    """
    Generates images synchronously based on prompt and resolution.
    """

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityStableImageUltraNode",
            display_name="Stability AI Stable Image Ultra",
            category="api node/image/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
@ -80,39 +80,39 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
                                    "is a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`" +
                                    "would convey a sky that was blue and green, but more green than blue.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
-                    options=[x.value for x in StabilityAspectRatio],
-                    default=StabilityAspectRatio.ratio_1_1.value,
+                    options=StabilityAspectRatio,
+                    default=StabilityAspectRatio.ratio_1_1,
                    tooltip="Aspect ratio of generated image.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "style_preset",
                    options=get_stability_style_presets(),
                    tooltip="Optional desired style of generated image.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for creating the noise.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    optional=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    default="",
                    tooltip="A blurb of text describing what you do not wish to see in the output image. This is an advanced feature.",
                    force_input=True,
                    optional=True,
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "image_denoise",
                    default=0.5,
                    min=0.0,
@ -123,12 +123,12 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -143,7 +143,7 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
        image: Optional[torch.Tensor] = None,
        negative_prompt: str = "",
        image_denoise: Optional[float] = 0.5,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        # prepare image binary if image present
        image_binary = None
@ -193,44 +193,44 @@ class StabilityStableImageUltraNode(comfy_io.ComfyNode):
        image_data = base64.b64decode(response_api.image)
        returned_image = bytesio_to_image_tensor(BytesIO(image_data))

-        return comfy_io.NodeOutput(returned_image)
+        return IO.NodeOutput(returned_image)


-class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
+class StabilityStableImageSD_3_5Node(IO.ComfyNode):
    """
    Generates images synchronously based on prompt and resolution.
    """

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityStableImageSD_3_5Node",
            display_name="Stability AI Stable Diffusion 3.5 Image",
            category="api node/image/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[x.value for x in Stability_SD3_5_Model],
+                    options=Stability_SD3_5_Model,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
-                    options=[x.value for x in StabilityAspectRatio],
-                    default=StabilityAspectRatio.ratio_1_1.value,
+                    options=StabilityAspectRatio,
+                    default=StabilityAspectRatio.ratio_1_1,
                    tooltip="Aspect ratio of generated image.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "style_preset",
                    options=get_stability_style_presets(),
                    tooltip="Optional desired style of generated image.",
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "cfg_scale",
                    default=4.0,
                    min=1.0,
@ -238,28 +238,28 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
                    step=0.1,
                    tooltip="How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt)",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for creating the noise.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    optional=True,
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    default="",
                    tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
                    force_input=True,
                    optional=True,
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "image_denoise",
                    default=0.5,
                    min=0.0,
@ -270,12 +270,12 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -292,7 +292,7 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
        image: Optional[torch.Tensor] = None,
        negative_prompt: str = "",
        image_denoise: Optional[float] = 0.5,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        # prepare image binary if image present
        image_binary = None
@ -348,30 +348,30 @@ class StabilityStableImageSD_3_5Node(comfy_io.ComfyNode):
        image_data = base64.b64decode(response_api.image)
        returned_image = bytesio_to_image_tensor(BytesIO(image_data))

-        return comfy_io.NodeOutput(returned_image)
+        return IO.NodeOutput(returned_image)


-class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
+class StabilityUpscaleConservativeNode(IO.ComfyNode):
    """
    Upscale image with minimal alterations to 4K resolution.
    """

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityUpscaleConservativeNode",
            display_name="Stability AI Upscale Conservative",
            category="api node/image/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Image.Input("image"),
-                comfy_io.String.Input(
+                IO.Image.Input("image"),
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "creativity",
                    default=0.35,
                    min=0.2,
@ -379,17 +379,17 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
                    step=0.01,
                    tooltip="Controls the likelihood of creating additional details not heavily conditioned by the init image.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for creating the noise.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    default="",
                    tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
@ -398,12 +398,12 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -416,7 +416,7 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
        creativity: float,
        seed: int,
        negative_prompt: str = "",
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read()

@ -457,30 +457,30 @@ class StabilityUpscaleConservativeNode(comfy_io.ComfyNode):
        image_data = base64.b64decode(response_api.image)
        returned_image = bytesio_to_image_tensor(BytesIO(image_data))

-        return comfy_io.NodeOutput(returned_image)
+        return IO.NodeOutput(returned_image)


-class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
+class StabilityUpscaleCreativeNode(IO.ComfyNode):
    """
    Upscale image with minimal alterations to 4K resolution.
    """

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityUpscaleCreativeNode",
            display_name="Stability AI Upscale Creative",
            category="api node/image/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Image.Input("image"),
-                comfy_io.String.Input(
+                IO.Image.Input("image"),
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.",
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "creativity",
                    default=0.3,
                    min=0.1,
@ -488,22 +488,22 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
                    step=0.01,
                    tooltip="Controls the likelihood of creating additional details not heavily conditioned by the init image.",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "style_preset",
                    options=get_stability_style_presets(),
                    tooltip="Optional desired style of generated image.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for creating the noise.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    default="",
                    tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.",
@ -512,12 +512,12 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -531,7 +531,7 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
        style_preset: str,
        seed: int,
        negative_prompt: str = "",
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        image_binary = tensor_to_bytesio(image, total_pixels=1024*1024).read()

@ -591,37 +591,37 @@ class StabilityUpscaleCreativeNode(comfy_io.ComfyNode):
        image_data = base64.b64decode(response_poll.result)
        returned_image = bytesio_to_image_tensor(BytesIO(image_data))

-        return comfy_io.NodeOutput(returned_image)
+        return IO.NodeOutput(returned_image)


-class StabilityUpscaleFastNode(comfy_io.ComfyNode):
+class StabilityUpscaleFastNode(IO.ComfyNode):
    """
    Quickly upscales an image via Stability API call to 4x its original size; intended for upscaling low-quality/compressed images.
    """

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityUpscaleFastNode",
            display_name="Stability AI Upscale Fast",
            category="api node/image/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Image.Input("image"),
+                IO.Image.Input("image"),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )

    @classmethod
-    async def execute(cls, image: torch.Tensor) -> comfy_io.NodeOutput:
+    async def execute(cls, image: torch.Tensor) -> IO.NodeOutput:
        image_binary = tensor_to_bytesio(image, total_pixels=4096*4096).read()

        files = {
@ -653,26 +653,26 @@ class StabilityUpscaleFastNode(comfy_io.ComfyNode):
        image_data = base64.b64decode(response_api.image)
        returned_image = bytesio_to_image_tensor(BytesIO(image_data))

-        return comfy_io.NodeOutput(returned_image)
+        return IO.NodeOutput(returned_image)


-class StabilityTextToAudio(comfy_io.ComfyNode):
+class StabilityTextToAudio(IO.ComfyNode):
    """Generates high-quality music and sound effects from text descriptions."""

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityTextToAudio",
            display_name="Stability AI Text To Audio",
            category="api node/audio/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["stable-audio-2.5"],
                ),
-                comfy_io.String.Input("prompt", multiline=True, default=""),
-                comfy_io.Int.Input(
+                IO.String.Input("prompt", multiline=True, default=""),
+                IO.Int.Input(
                    "duration",
                    default=190,
                    min=1,
@ -681,18 +681,18 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
                    tooltip="Controls the duration in seconds of the generated audio.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for generation.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "steps",
                    default=8,
                    min=4,
@ -703,18 +703,18 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Audio.Output(),
+                IO.Audio.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )

    @classmethod
-    async def execute(cls, model: str, prompt: str, duration: int, seed: int, steps: int) -> comfy_io.NodeOutput:
+    async def execute(cls, model: str, prompt: str, duration: int, seed: int, steps: int) -> IO.NodeOutput:
        validate_string(prompt, max_length=10000)
        payload = StabilityTextToAudioRequest(prompt=prompt, model=model, duration=duration, seed=seed, steps=steps)
        operation = SynchronousOperation(
@ -734,27 +734,27 @@ class StabilityTextToAudio(comfy_io.ComfyNode):
        response_api = await operation.execute()
        if not response_api.audio:
            raise ValueError("No audio file was received in response.")
-        return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
+        return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))


-class StabilityAudioToAudio(comfy_io.ComfyNode):
+class StabilityAudioToAudio(IO.ComfyNode):
    """Transforms existing audio samples into new high-quality compositions using text instructions."""

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityAudioToAudio",
            display_name="Stability AI Audio To Audio",
            category="api node/audio/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["stable-audio-2.5"],
                ),
-                comfy_io.String.Input("prompt", multiline=True, default=""),
-                comfy_io.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
-                comfy_io.Int.Input(
+                IO.String.Input("prompt", multiline=True, default=""),
+                IO.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
+                IO.Int.Input(
                    "duration",
                    default=190,
                    min=1,
@ -763,18 +763,18 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
                    tooltip="Controls the duration in seconds of the generated audio.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for generation.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "steps",
                    default=8,
                    min=4,
@ -783,24 +783,24 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
                    tooltip="Controls the number of sampling steps.",
                    optional=True,
                ),
-                comfy_io.Float.Input(
+                IO.Float.Input(
                    "strength",
                    default=1,
                    min=0.01,
                    max=1.0,
                    step=0.01,
-                    display_mode=comfy_io.NumberDisplay.slider,
+                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Parameter controls how much influence the audio parameter has on the generated audio.",
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Audio.Output(),
+                IO.Audio.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -808,7 +808,7 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
    @classmethod
    async def execute(
        cls, model: str, prompt: str, audio: Input.Audio, duration: int, seed: int, steps: int, strength: float
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, max_length=10000)
        validate_audio_duration(audio, 6, 190)
        payload = StabilityAudioToAudioRequest(
@ -832,27 +832,27 @@ class StabilityAudioToAudio(comfy_io.ComfyNode):
        response_api = await operation.execute()
        if not response_api.audio:
            raise ValueError("No audio file was received in response.")
-        return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
+        return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))


-class StabilityAudioInpaint(comfy_io.ComfyNode):
+class StabilityAudioInpaint(IO.ComfyNode):
    """Transforms part of existing audio sample using text instructions."""

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="StabilityAudioInpaint",
            display_name="Stability AI Audio Inpaint",
            category="api node/audio/Stability AI",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["stable-audio-2.5"],
                ),
-                comfy_io.String.Input("prompt", multiline=True, default=""),
-                comfy_io.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
-                comfy_io.Int.Input(
+                IO.String.Input("prompt", multiline=True, default=""),
+                IO.Audio.Input("audio", tooltip="Audio must be between 6 and 190 seconds long."),
+                IO.Int.Input(
                    "duration",
                    default=190,
                    min=1,
@ -861,18 +861,18 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
                    tooltip="Controls the duration in seconds of the generated audio.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=4294967294,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="The random seed used for generation.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "steps",
                    default=8,
                    min=4,
@ -881,7 +881,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
                    tooltip="Controls the number of sampling steps.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "mask_start",
                    default=30,
                    min=0,
@ -889,7 +889,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
                    step=1,
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "mask_end",
                    default=190,
                    min=0,
@ -899,12 +899,12 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Audio.Output(),
+                IO.Audio.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -920,7 +920,7 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
        steps: int,
        mask_start: int,
        mask_end: int,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_string(prompt, max_length=10000)
        if mask_end <= mask_start:
            raise ValueError(f"Value of mask_end({mask_end}) should be greater then mask_start({mask_start})")
@ -953,12 +953,12 @@ class StabilityAudioInpaint(comfy_io.ComfyNode):
        response_api = await operation.execute()
        if not response_api.audio:
            raise ValueError("No audio file was received in response.")
-        return comfy_io.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))
+        return IO.NodeOutput(audio_bytes_to_audio_input(base64.b64decode(response_api.audio)))


 class StabilityExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            StabilityStableImageUltraNode,
            StabilityStableImageSD_3_5Node,
--- a/comfy_api_nodes/nodes_veo2.py
+++ b/comfy_api_nodes/nodes_veo2.py
@ -6,7 +6,7 @@ from io import BytesIO
 from typing import Optional
 from typing_extensions import override

-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api.input_impl.video_types import VideoFromFile
 from comfy_api_nodes.apis import (
    VeoGenVidRequest,
@ -27,6 +27,13 @@ from comfy_api_nodes.apinode_utils import (
 )

 AVERAGE_DURATION_VIDEO_GEN = 32
+MODELS_MAP = {
+    "veo-2.0-generate-001": "veo-2.0-generate-001",
+    "veo-3.1-generate": "veo-3.1-generate-preview",
+    "veo-3.1-fast-generate": "veo-3.1-fast-generate-preview",
+    "veo-3.0-generate-001": "veo-3.0-generate-001",
+    "veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001",
+}

 def convert_image_to_base64(image: torch.Tensor):
    if image is None:
@ -51,7 +58,7 @@ def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optiona
    return None


-class VeoVideoGenerationNode(comfy_io.ComfyNode):
+class VeoVideoGenerationNode(IO.ComfyNode):
    """
    Generates videos from text prompts using Google's Veo API.

@ -61,71 +68,71 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="VeoVideoGenerationNode",
            display_name="Google Veo 2 Video Generation",
            category="api node/video/Veo",
            description="Generates videos from text prompts using Google's Veo 2 API",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text description of the video",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["16:9", "9:16"],
                    default="16:9",
                    tooltip="Aspect ratio of the output video",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Negative text prompt to guide what to avoid in the video",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration_seconds",
                    default=5,
                    min=5,
                    max=8,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "enhance_prompt",
                    default=True,
                    tooltip="Whether to enhance the prompt with AI assistance",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "person_generation",
                    options=["ALLOW", "BLOCK"],
                    default="ALLOW",
                    tooltip="Whether to allow generating people in the video",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=0xFFFFFFFF,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="Optional reference image to guide video generation",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["veo-2.0-generate-001"],
                    default="veo-2.0-generate-001",
@ -134,12 +141,12 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -158,6 +165,7 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
        model="veo-2.0-generate-001",
        generate_audio=False,
    ):
+        model = MODELS_MAP[model]
        # Prepare the instances for the request
        instances = []

@ -215,7 +223,7 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
        initial_response = await initial_operation.execute()
        operation_name = initial_response.name

-        logging.info(f"Veo generation started with operation name: {operation_name}")
+        logging.info("Veo generation started with operation name: %s", operation_name)

        # Define status extractor function
        def status_extractor(response):
@ -302,7 +310,7 @@ class VeoVideoGenerationNode(comfy_io.ComfyNode):
        video_io = BytesIO(video_data)

        # Return VideoFromFile object
-        return comfy_io.NodeOutput(VideoFromFile(video_io))
+        return IO.NodeOutput(VideoFromFile(video_io))


 class Veo3VideoGenerationNode(VeoVideoGenerationNode):
@ -319,78 +327,80 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="Veo3VideoGenerationNode",
            display_name="Google Veo 3 Video Generation",
            category="api node/video/Veo",
            description="Generates videos from text prompts using Google's Veo 3 API",
            inputs=[
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text description of the video",
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
                    options=["16:9", "9:16"],
                    default="16:9",
                    tooltip="Aspect ratio of the output video",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Negative text prompt to guide what to avoid in the video",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration_seconds",
                    default=8,
                    min=8,
                    max=8,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "enhance_prompt",
                    default=True,
                    tooltip="Whether to enhance the prompt with AI assistance",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "person_generation",
                    options=["ALLOW", "BLOCK"],
                    default="ALLOW",
                    tooltip="Whether to allow generating people in the video",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=0xFFFFFFFF,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="Optional reference image to guide video generation",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
+                    options=[
+                        "veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.0-generate-001", "veo-3.0-fast-generate-001"
+                    ],
                    default="veo-3.0-generate-001",
                    tooltip="Veo 3 model to use for video generation",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="Generate audio for the video. Supported by all Veo 3 models.",
@ -398,12 +408,12 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -411,7 +421,7 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):

 class VeoExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            VeoVideoGenerationNode,
            Veo3VideoGenerationNode,
--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@ -6,7 +6,7 @@ from typing_extensions import override
 import torch
 from pydantic import BaseModel, Field

-from comfy_api.latest import ComfyExtension, io as comfy_io
+from comfy_api.latest import ComfyExtension, IO
 from comfy_api_nodes.util.validation_utils import (
    validate_aspect_ratio_closeness,
    validate_image_dimensions,
@ -161,77 +161,77 @@ async def execute_task(
    )


-class ViduTextToVideoNode(comfy_io.ComfyNode):
+class ViduTextToVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ViduTextToVideoNode",
            display_name="Vidu Text To Video Generation",
            category="api node/video/Vidu",
            description="Generate video from text prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in VideoModelName],
-                    default=VideoModelName.vidu_q1.value,
+                    options=VideoModelName,
+                    default=VideoModelName.vidu_q1,
                    tooltip="Model name",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="A textual description for video generation",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=5,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
-                    options=[model.value for model in AspectRatio],
-                    default=AspectRatio.r_16_9.value,
+                    options=AspectRatio,
+                    default=AspectRatio.r_16_9,
                    tooltip="The aspect ratio of the output video",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=Resolution,
+                    default=Resolution.r_1080p,
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
-                    default=MovementAmplitude.auto.value,
+                    options=MovementAmplitude,
+                    default=MovementAmplitude.auto,
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -246,7 +246,7 @@ class ViduTextToVideoNode(comfy_io.ComfyNode):
        aspect_ratio: str,
        resolution: str,
        movement_amplitude: str,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        if not prompt:
            raise ValueError("The prompt field is required and cannot be empty.")
        payload = TaskCreationRequest(
@ -263,79 +263,79 @@ class ViduTextToVideoNode(comfy_io.ComfyNode):
            "comfy_api_key": cls.hidden.api_key_comfy_org,
        }
        results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))


-class ViduImageToVideoNode(comfy_io.ComfyNode):
+class ViduImageToVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ViduImageToVideoNode",
            display_name="Vidu Image To Video Generation",
            category="api node/video/Vidu",
            description="Generate video from image and optional prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in VideoModelName],
-                    default=VideoModelName.vidu_q1.value,
+                    options=VideoModelName,
+                    default=VideoModelName.vidu_q1,
                    tooltip="Model name",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                    tooltip="An image to be used as the start frame of the generated video",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="A textual description for video generation",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=5,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=Resolution,
+                    default=Resolution.r_1080p,
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
+                    options=MovementAmplitude,
                    default=MovementAmplitude.auto.value,
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -350,7 +350,7 @@ class ViduImageToVideoNode(comfy_io.ComfyNode):
        seed: int,
        resolution: str,
        movement_amplitude: str,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        if get_number_of_images(image) > 1:
            raise ValueError("Only one input image is allowed.")
        validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
@ -373,70 +373,70 @@ class ViduImageToVideoNode(comfy_io.ComfyNode):
            auth_kwargs=auth,
        )
        results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))


-class ViduReferenceVideoNode(comfy_io.ComfyNode):
+class ViduReferenceVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ViduReferenceVideoNode",
            display_name="Vidu Reference To Video Generation",
            category="api node/video/Vidu",
            description="Generate video from multiple images and prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=[model.value for model in VideoModelName],
-                    default=VideoModelName.vidu_q1.value,
+                    options=VideoModelName,
+                    default=VideoModelName.vidu_q1,
                    tooltip="Model name",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "images",
                    tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="A textual description for video generation",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=5,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "aspect_ratio",
-                    options=[model.value for model in AspectRatio],
-                    default=AspectRatio.r_16_9.value,
+                    options=AspectRatio,
+                    default=AspectRatio.r_16_9,
                    tooltip="The aspect ratio of the output video",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=[model.value for model in Resolution],
                    default=Resolution.r_1080p.value,
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "movement_amplitude",
                    options=[model.value for model in MovementAmplitude],
                    default=MovementAmplitude.auto.value,
@ -445,12 +445,12 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -466,7 +466,7 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
        aspect_ratio: str,
        resolution: str,
        movement_amplitude: str,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        if not prompt:
            raise ValueError("The prompt field is required and cannot be empty.")
        a = get_number_of_images(images)
@ -495,68 +495,68 @@ class ViduReferenceVideoNode(comfy_io.ComfyNode):
            auth_kwargs=auth,
        )
        results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))


-class ViduStartEndToVideoNode(comfy_io.ComfyNode):
+class ViduStartEndToVideoNode(IO.ComfyNode):

    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="ViduStartEndToVideoNode",
            display_name="Vidu Start End To Video Generation",
            category="api node/video/Vidu",
            description="Generate a video from start and end frames and a prompt",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=[model.value for model in VideoModelName],
                    default=VideoModelName.vidu_q1.value,
                    tooltip="Model name",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "first_frame",
                    tooltip="Start frame",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "end_frame",
                    tooltip="End frame",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="A textual description for video generation",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=5,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Duration of the output video in seconds",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed for video generation (0 for random)",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=[model.value for model in Resolution],
                    default=Resolution.r_1080p.value,
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "movement_amplitude",
                    options=[model.value for model in MovementAmplitude],
                    default=MovementAmplitude.auto.value,
@ -565,12 +565,12 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -586,7 +586,7 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
        seed: int,
        resolution: str,
        movement_amplitude: str,
-    ) -> comfy_io.NodeOutput:
+    ) -> IO.NodeOutput:
        validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
        payload = TaskCreationRequest(
            model_name=model,
@ -605,12 +605,12 @@ class ViduStartEndToVideoNode(comfy_io.ComfyNode):
            for frame in (first_frame, end_frame)
        ]
        results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id)
-        return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))


 class ViduExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            ViduTextToVideoNode,
            ViduImageToVideoNode,
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@ -4,7 +4,7 @@ from typing_extensions import override

 import torch
 from pydantic import BaseModel, Field
-from comfy_api.latest import ComfyExtension, Input, io as comfy_io
+from comfy_api.latest import ComfyExtension, Input, IO
 from comfy_api_nodes.apis.client import (
    ApiEndpoint,
    HttpMethod,
@ -28,6 +28,12 @@ class Text2ImageInputField(BaseModel):
    negative_prompt: Optional[str] = Field(None)


+class Image2ImageInputField(BaseModel):
+    prompt: str = Field(...)
+    negative_prompt: Optional[str] = Field(None)
+    images: list[str] = Field(..., min_length=1, max_length=2)
+
+
 class Text2VideoInputField(BaseModel):
    prompt: str = Field(...)
    negative_prompt: Optional[str] = Field(None)
@ -49,6 +55,13 @@ class Txt2ImageParametersField(BaseModel):
    watermark: bool = Field(True)


+class Image2ImageParametersField(BaseModel):
+    size: Optional[str] = Field(None)
+    n: int = Field(1, description="Number of images to generate.")  # we support only value=1
+    seed: int = Field(..., ge=0, le=2147483647)
+    watermark: bool = Field(True)
+
+
 class Text2VideoParametersField(BaseModel):
    size: str = Field(...)
    seed: int = Field(..., ge=0, le=2147483647)
@ -73,6 +86,12 @@ class Text2ImageTaskCreationRequest(BaseModel):
    parameters: Txt2ImageParametersField = Field(...)


+class Image2ImageTaskCreationRequest(BaseModel):
+    model: str = Field(...)
+    input: Image2ImageInputField = Field(...)
+    parameters: Image2ImageParametersField = Field(...)
+
+
 class Text2VideoTaskCreationRequest(BaseModel):
    model: str = Field(...)
    input: Text2VideoInputField = Field(...)
@ -135,7 +154,12 @@ async def process_task(
    url: str,
    request_model: Type[T],
    response_model: Type[R],
-    payload: Union[Text2ImageTaskCreationRequest, Text2VideoTaskCreationRequest, Image2VideoTaskCreationRequest],
+    payload: Union[
+        Text2ImageTaskCreationRequest,
+        Image2ImageTaskCreationRequest,
+        Text2VideoTaskCreationRequest,
+        Image2VideoTaskCreationRequest,
+    ],
    node_id: str,
    estimated_duration: int,
    poll_interval: int,
@ -171,35 +195,35 @@ async def process_task(
    ).execute()


-class WanTextToImageApi(comfy_io.ComfyNode):
+class WanTextToImageApi(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="WanTextToImageApi",
            display_name="Wan Text to Image",
            category="api node/image/Wan",
            description="Generates image based on text prompt.",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["wan2.5-t2i-preview"],
                    default="wan2.5-t2i-preview",
                    tooltip="Model to use.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Negative text prompt to guide what to avoid.",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "width",
                    default=1024,
                    min=768,
@ -207,7 +231,7 @@ class WanTextToImageApi(comfy_io.ComfyNode):
                    step=32,
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "height",
                    default=1024,
                    min=768,
@ -215,24 +239,24 @@ class WanTextToImageApi(comfy_io.ComfyNode):
                    step=32,
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "prompt_extend",
                    default=True,
                    tooltip="Whether to enhance the prompt with AI assistance.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -240,12 +264,12 @@ class WanTextToImageApi(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Image.Output(),
+                IO.Image.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -285,38 +309,160 @@ class WanTextToImageApi(comfy_io.ComfyNode):
            estimated_duration=9,
            poll_interval=3,
        )
-        return comfy_io.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
+        return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))


-class WanTextToVideoApi(comfy_io.ComfyNode):
+class WanImageToImageApi(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
-            node_id="WanTextToVideoApi",
-            display_name="Wan Text to Video",
-            category="api node/video/Wan",
-            description="Generates video based on text prompt.",
+        return IO.Schema(
+            node_id="WanImageToImageApi",
+            display_name="Wan Image to Image",
+            category="api node/image/Wan",
+            description="Generates an image from one or two input images and a text prompt. "
+                        "The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
-                    options=["wan2.5-t2v-preview"],
-                    default="wan2.5-t2v-preview",
+                    options=["wan2.5-i2i-preview"],
+                    default="wan2.5-i2i-preview",
                    tooltip="Model to use.",
                ),
-                comfy_io.String.Input(
+                IO.Image.Input(
+                    "image",
+                    tooltip="Single-image editing or multi-image fusion, maximum 2 images.",
+                ),
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Negative text prompt to guide what to avoid.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                # redo this later as an optional combo of recommended resolutions
+                # IO.Int.Input(
+                #     "width",
+                #     default=1280,
+                #     min=384,
+                #     max=1440,
+                #     step=16,
+                #     optional=True,
+                # ),
+                # IO.Int.Input(
+                #     "height",
+                #     default=1280,
+                #     min=384,
+                #     max=1440,
+                #     step=16,
+                #     optional=True,
+                # ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                    optional=True,
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=True,
+                    tooltip="Whether to add an \"AI generated\" watermark to the result.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        image: torch.Tensor,
+        prompt: str,
+        negative_prompt: str = "",
+        # width: int = 1024,
+        # height: int = 1024,
+        seed: int = 0,
+        watermark: bool = True,
+    ):
+        n_images = get_number_of_images(image)
+        if n_images not in (1, 2):
+            raise ValueError(f"Expected 1 or 2 input images, got {n_images}.")
+        images = []
+        for i in image:
+            images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096*4096))
+        payload = Image2ImageTaskCreationRequest(
+            model=model,
+            input=Image2ImageInputField(prompt=prompt, negative_prompt=negative_prompt, images=images),
+            parameters=Image2ImageParametersField(
+                # size=f"{width}*{height}",
+                seed=seed,
+                watermark=watermark,
+            ),
+        )
+        response = await process_task(
+            {
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            "/proxy/wan/api/v1/services/aigc/image2image/image-synthesis",
+            request_model=Image2ImageTaskCreationRequest,
+            response_model=ImageTaskStatusResponse,
+            payload=payload,
+            node_id=cls.hidden.unique_id,
+            estimated_duration=42,
+            poll_interval=3,
+        )
+        return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url)))
+
+
+class WanTextToVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="WanTextToVideoApi",
+            display_name="Wan Text to Video",
+            category="api node/video/Wan",
+            description="Generates video based on text prompt.",
+            inputs=[
+                IO.Combo.Input(
+                    "model",
+                    options=["wan2.5-t2v-preview"],
+                    default="wan2.5-t2v-preview",
+                    tooltip="Model to use.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Negative text prompt to guide what to avoid.",
+                    optional=True,
+                ),
+                IO.Combo.Input(
                    "size",
                    options=[
                        "480p: 1:1 (624x624)",
@ -336,45 +482,45 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
                    default="480p: 1:1 (624x624)",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=10,
                    step=5,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Available durations: 5 and 10 seconds",
                    optional=True,
                ),
-                comfy_io.Audio.Input(
+                IO.Audio.Input(
                    "audio",
                    optional=True,
                    tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    optional=True,
                    tooltip="If there is no audio input, generate audio automatically.",
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "prompt_extend",
                    default=True,
                    tooltip="Whether to enhance the prompt with AI assistance.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -382,12 +528,12 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -436,41 +582,41 @@ class WanTextToVideoApi(comfy_io.ComfyNode):
            estimated_duration=120 * int(duration / 5),
            poll_interval=6,
        )
-        return comfy_io.NodeOutput(await download_url_to_video_output(response.output.video_url))
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))


-class WanImageToVideoApi(comfy_io.ComfyNode):
+class WanImageToVideoApi(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
-        return comfy_io.Schema(
+        return IO.Schema(
            node_id="WanImageToVideoApi",
            display_name="Wan Image to Video",
            category="api node/video/Wan",
            description="Generates video based on the first frame and text prompt.",
            inputs=[
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "model",
                    options=["wan2.5-i2v-preview"],
                    default="wan2.5-i2v-preview",
                    tooltip="Model to use.",
                ),
-                comfy_io.Image.Input(
+                IO.Image.Input(
                    "image",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
                ),
-                comfy_io.String.Input(
+                IO.String.Input(
                    "negative_prompt",
                    multiline=True,
                    default="",
                    tooltip="Negative text prompt to guide what to avoid.",
                    optional=True,
                ),
-                comfy_io.Combo.Input(
+                IO.Combo.Input(
                    "resolution",
                    options=[
                        "480P",
@ -480,45 +626,45 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
                    default="480P",
                    optional=True,
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "duration",
                    default=5,
                    min=5,
                    max=10,
                    step=5,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    tooltip="Available durations: 5 and 10 seconds",
                    optional=True,
                ),
-                comfy_io.Audio.Input(
+                IO.Audio.Input(
                    "audio",
                    optional=True,
                    tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
                ),
-                comfy_io.Int.Input(
+                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
-                    display_mode=comfy_io.NumberDisplay.number,
+                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed to use for generation.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    optional=True,
                    tooltip="If there is no audio input, generate audio automatically.",
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "prompt_extend",
                    default=True,
                    tooltip="Whether to enhance the prompt with AI assistance.",
                    optional=True,
                ),
-                comfy_io.Boolean.Input(
+                IO.Boolean.Input(
                    "watermark",
                    default=True,
                    tooltip="Whether to add an \"AI generated\" watermark to the result.",
@ -526,12 +672,12 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
                ),
            ],
            outputs=[
-                comfy_io.Video.Output(),
+                IO.Video.Output(),
            ],
            hidden=[
-                comfy_io.Hidden.auth_token_comfy_org,
-                comfy_io.Hidden.api_key_comfy_org,
-                comfy_io.Hidden.unique_id,
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
            ],
            is_api_node=True,
        )
@ -585,14 +731,15 @@ class WanImageToVideoApi(comfy_io.ComfyNode):
            estimated_duration=120 * int(duration / 5),
            poll_interval=6,
        )
-        return comfy_io.NodeOutput(await download_url_to_video_output(response.output.video_url))
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))


 class WanApiExtension(ComfyExtension):
    @override
-    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            WanTextToImageApi,
+            WanImageToImageApi,
            WanTextToVideoApi,
            WanImageToVideoApi,
        ]
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@ -6,7 +6,6 @@ from .graph_utils import is_link
 from comfy.nodes_context import get_nodes
 from abc import ABC, abstractmethod

-
 NODE_CLASS_CONTAINS_UNIQUE_ID: Dict[str, bool] = {}


@ -273,6 +272,27 @@ class HierarchicalCache(BasicCache):
        return await cache._ensure_subcache(node_id, children_ids)


+class NullCache:
+
+    async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
+        pass
+
+    def all_node_ids(self):
+        return []
+
+    def clean_unused(self):
+        pass
+
+    def get(self, node_id):
+        return None
+
+    def set(self, node_id, value):
+        pass
+
+    async def ensure_subcache_for(self, node_id, children_ids):
+        return self
+
+
 class LRUCache(BasicCache):
    def __init__(self, key_class, max_size=100):
        super().__init__(key_class)
@ -324,157 +344,3 @@ class LRUCache(BasicCache):
            self._mark_used(child_id)
            self.children[cache_key].append(self.cache_key_set.get_data_key(child_id))
        return self
-
-
-class DependencyAwareCache(BasicCache):
-    """
-    A cache implementation that tracks dependencies between nodes and manages
-    their execution and caching accordingly. It extends the BasicCache class.
-    Nodes are removed from this cache once all of their descendants have been
-    executed.
-    """
-
-    def __init__(self, key_class):
-        """
-        Initialize the DependencyAwareCache.
-
-        Args:
-            key_class: The class used for generating cache keys.
-        """
-        super().__init__(key_class)
-        self.descendants = {}  # Maps node_id -> set of descendant node_ids
-        self.ancestors = {}    # Maps node_id -> set of ancestor node_ids
-        self.executed_nodes = set()  # Tracks nodes that have been executed
-
-    async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
-        """
-        Clear the entire cache and rebuild the dependency graph.
-
-        Args:
-            dynprompt: The dynamic prompt object containing node information.
-            node_ids: List of node IDs to initialize the cache for.
-            is_changed_cache: Flag indicating if the cache has changed.
-        """
-        # Clear all existing cache data
-        self.cache.clear()
-        self.subcaches.clear()
-        self.descendants.clear()
-        self.ancestors.clear()
-        self.executed_nodes.clear()
-
-        # Call the parent method to initialize the cache with the new prompt
-        await super().set_prompt(dynprompt, node_ids, is_changed_cache)
-
-        # Rebuild the dependency graph
-        self._build_dependency_graph(dynprompt, node_ids)
-
-    def _build_dependency_graph(self, dynprompt, node_ids):
-        """
-        Build the dependency graph for all nodes.
-
-        Args:
-            dynprompt: The dynamic prompt object containing node information.
-            node_ids: List of node IDs to build the graph for.
-        """
-        self.descendants.clear()
-        self.ancestors.clear()
-        for node_id in node_ids:
-            self.descendants[node_id] = set()
-            self.ancestors[node_id] = set()
-
-        for node_id in node_ids:
-            inputs = dynprompt.get_node(node_id)["inputs"]
-            for input_data in inputs.values():
-                if is_link(input_data):  # Check if the input is a link to another node
-                    ancestor_id = input_data[0]
-                    self.descendants[ancestor_id].add(node_id)
-                    self.ancestors[node_id].add(ancestor_id)
-
-    def set(self, node_id, value):
-        """
-        Mark a node as executed and store its value in the cache.
-
-        Args:
-            node_id: The ID of the node to store.
-            value: The value to store for the node.
-        """
-        self._set_immediate(node_id, value)
-        self.executed_nodes.add(node_id)
-        self._cleanup_ancestors(node_id)
-
-    def get(self, node_id):
-        """
-        Retrieve the cached value for a node.
-
-        Args:
-            node_id: The ID of the node to retrieve.
-
-        Returns:
-            The cached value for the node.
-        """
-        return self._get_immediate(node_id)
-
-    async def ensure_subcache_for(self, node_id, children_ids):
-        """
-        Ensure a subcache exists for a node and update dependencies.
-
-        Args:
-            node_id: The ID of the parent node.
-            children_ids: List of child node IDs to associate with the parent node.
-
-        Returns:
-            The subcache object for the node.
-        """
-        subcache = await super()._ensure_subcache(node_id, children_ids)
-        for child_id in children_ids:
-            self.descendants[node_id].add(child_id)
-            self.ancestors[child_id].add(node_id)
-        return subcache
-
-    def _cleanup_ancestors(self, node_id):
-        """
-        Check if ancestors of a node can be removed from the cache.
-
-        Args:
-            node_id: The ID of the node whose ancestors are to be checked.
-        """
-        for ancestor_id in self.ancestors.get(node_id, []):
-            if ancestor_id in self.executed_nodes:
-                # Remove ancestor if all its descendants have been executed
-                if all(descendant in self.executed_nodes for descendant in self.descendants[ancestor_id]):
-                    self._remove_node(ancestor_id)
-
-    def _remove_node(self, node_id):
-        """
-        Remove a node from the cache.
-
-        Args:
-            node_id: The ID of the node to remove.
-        """
-        cache_key = self.cache_key_set.get_data_key(node_id)
-        if cache_key in self.cache:
-            del self.cache[cache_key]
-        subcache_key = self.cache_key_set.get_subcache_key(node_id)
-        if subcache_key in self.subcaches:
-            del self.subcaches[subcache_key]
-
-    def clean_unused(self):
-        """
-        Clean up unused nodes. This is a no-op for this cache implementation.
-        """
-        pass
-
-    def recursive_debug_dump(self):
-        """
-        Dump the cache and dependency graph for debugging.
-
-        Returns:
-            A list containing the cache state and dependency graph.
-        """
-        result = super().recursive_debug_dump()
-        result.append({
-            "descendants": self.descendants,
-            "ancestors": self.ancestors,
-            "executed_nodes": list(self.executed_nodes),
-        })
-        return result
--- a/comfy_execution/graph.py
+++ b/comfy_execution/graph.py
@ -161,8 +161,9 @@ class TopologicalSort:
                        continue
                    _, _, input_info = self.get_input_info(unique_id, input_name)
                    is_lazy = input_info is not None and "lazy" in input_info and input_info["lazy"]
-                    if (include_lazy or not is_lazy) and not self.is_cached(from_node_id):
-                        node_ids.append(from_node_id)
+                    if (include_lazy or not is_lazy):
+                        if not self.is_cached(from_node_id):
+                            node_ids.append(from_node_id)
                        links.append((from_node_id, from_socket, unique_id))

        for link in links:
@ -206,10 +207,34 @@ class ExecutionList(TopologicalSort):
        super().__init__(dynprompt)
        self.output_cache = output_cache
        self.staged_node_id = None
+        self.execution_cache = {}
+        self.execution_cache_listeners = {}

    def is_cached(self, node_id):
        return self.output_cache.get(node_id) is not None

+    def cache_link(self, from_node_id, to_node_id):
+        if not to_node_id in self.execution_cache:
+            self.execution_cache[to_node_id] = {}
+        self.execution_cache[to_node_id][from_node_id] = self.output_cache.get(from_node_id)
+        if not from_node_id in self.execution_cache_listeners:
+            self.execution_cache_listeners[from_node_id] = set()
+        self.execution_cache_listeners[from_node_id].add(to_node_id)
+
+    def get_output_cache(self, from_node_id, to_node_id):
+        if not to_node_id in self.execution_cache:
+            return None
+        return self.execution_cache[to_node_id].get(from_node_id)
+
+    def cache_update(self, node_id, value):
+        if node_id in self.execution_cache_listeners:
+            for to_node_id in self.execution_cache_listeners[node_id]:
+                self.execution_cache[to_node_id][node_id] = value
+
+    def add_strong_link(self, from_node_id, from_socket, to_node_id):
+        super().add_strong_link(from_node_id, from_socket, to_node_id)
+        self.cache_link(from_node_id, to_node_id)
+
    async def stage_node_execution(self) -> tuple[Optional[str], Optional[DependencyExecutionErrorMessage], Optional[DependencyCycleError]]:
        assert self.staged_node_id is None
        if self.is_empty():
@ -289,6 +314,8 @@ class ExecutionList(TopologicalSort):
    def complete_node_execution(self):
        node_id = self.staged_node_id
        self.pop_node(node_id)
+        self.execution_cache.pop(node_id, None)
+        self.execution_cache_listeners.pop(node_id, None)
        self.staged_node_id = None

    def get_nodes_in_cycle(self):
--- a/comfy_extras/nodes/nodes_audio.py
+++ b/comfy_extras/nodes/nodes_audio.py
@ -163,9 +163,10 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
        for key, value in metadata.items():
            output_container.metadata[key] = value

+        layout = 'mono' if waveform.shape[0] == 1 else 'stereo'
        # Set up the output stream with appropriate properties
        if format == "opus":
-            out_stream = output_container.add_stream("libopus", rate=sample_rate)
+            out_stream = output_container.add_stream("libopus", rate=sample_rate, layout=layout)
            if quality == "64k":
                out_stream.bit_rate = 64000
            elif quality == "96k":
@ -177,7 +178,7 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            elif quality == "320k":
                out_stream.bit_rate = 320000
        elif format == "mp3":
-            out_stream = output_container.add_stream("libmp3lame", rate=sample_rate)
+            out_stream = output_container.add_stream("libmp3lame", rate=sample_rate, layout=layout)
            if quality == "V0":
                # TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
                out_stream.codec_context.qscale = 1
@ -186,9 +187,9 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            elif quality == "320k":
                out_stream.bit_rate = 320000
        else:  # format == "flac":
-            out_stream = output_container.add_stream("flac", rate=sample_rate)
+            out_stream = output_container.add_stream("flac", rate=sample_rate, layout=layout)

-        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[0] == 1 else 'stereo')
+        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout=layout)
        frame.sample_rate = sample_rate
        frame.pts = 0
        output_container.mux(out_stream.encode(frame))
@ -398,7 +399,7 @@ class RecordAudio:
        except (ImportError, ModuleNotFoundError):
            raise TorchAudioNotFoundError()

-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load(audio_path)
        audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
        return (audio,)

--- a/comfy_extras/nodes/nodes_audio_encoder.py
+++ b/comfy_extras/nodes/nodes_audio_encoder.py
@ -1,44 +1,62 @@
 from comfy.cmd import folder_paths
 import comfy.audio_encoders.audio_encoders
 import comfy.utils
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


-class AudioEncoderLoader:
+class AudioEncoderLoader(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio_encoder_name": (folder_paths.get_filename_list("audio_encoders"), ),
-                             }}
-    RETURN_TYPES = ("AUDIO_ENCODER",)
-    FUNCTION = "load_model"
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="AudioEncoderLoader",
+            category="loaders",
+            inputs=[
+                io.Combo.Input(
+                    "audio_encoder_name",
+                    options=folder_paths.get_filename_list("audio_encoders"),
+                ),
+            ],
+            outputs=[io.AudioEncoder.Output()],
+        )

-    CATEGORY = "loaders"
-
-    def load_model(self, audio_encoder_name):
+    @classmethod
+    def execute(cls, audio_encoder_name) -> io.NodeOutput:
        audio_encoder_name = folder_paths.get_full_path_or_raise("audio_encoders", audio_encoder_name)
        sd = comfy.utils.load_torch_file(audio_encoder_name, safe_load=True)
        audio_encoder = comfy.audio_encoders.audio_encoders.load_audio_encoder_from_sd(sd)
        if audio_encoder is None:
            raise RuntimeError("ERROR: audio encoder file is invalid and does not contain a valid model.")
-        return (audio_encoder,)
+        return io.NodeOutput(audio_encoder)


-class AudioEncoderEncode:
+class AudioEncoderEncode(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "audio_encoder": ("AUDIO_ENCODER",),
-                              "audio": ("AUDIO",),
-                             }}
-    RETURN_TYPES = ("AUDIO_ENCODER_OUTPUT",)
-    FUNCTION = "encode"
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="AudioEncoderEncode",
+            category="conditioning",
+            inputs=[
+                io.AudioEncoder.Input("audio_encoder"),
+                io.Audio.Input("audio"),
+            ],
+            outputs=[io.AudioEncoderOutput.Output()],
+        )

-    CATEGORY = "conditioning"
-
-    def encode(self, audio_encoder, audio):
+    @classmethod
+    def execute(cls, audio_encoder, audio) -> io.NodeOutput:
        output = audio_encoder.encode_audio(audio["waveform"], audio["sample_rate"])
-        return (output,)
+        return io.NodeOutput(output)


-NODE_CLASS_MAPPINGS = {
-    "AudioEncoderLoader": AudioEncoderLoader,
-    "AudioEncoderEncode": AudioEncoderEncode,
-}
+class AudioEncoder(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            AudioEncoderLoader,
+            AudioEncoderEncode,
+        ]
+
+
+async def comfy_entrypoint() -> AudioEncoder:
+    return AudioEncoder()
--- a/comfy_extras/nodes/nodes_clip_sdxl.py
+++ b/comfy_extras/nodes/nodes_clip_sdxl.py
@ -1,43 +1,52 @@
-from comfy.nodes.common import MAX_RESOLUTION
+from typing_extensions import override
+from comfy.common import MAX_RESOLUTION
+from comfy_api.latest import ComfyExtension, io

-class CLIPTextEncodeSDXLRefiner:
+
+class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeSDXLRefiner",
+            category="advanced/conditioning",
+            inputs=[
+                io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
+                io.Int.Input("width", default=1024, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("height", default=1024, min=0, max=MAX_RESOLUTION),
+                io.String.Input("text", multiline=True, dynamic_prompts=True),
+                io.Clip.Input("clip"),
+            ],
+            outputs=[io.Conditioning.Output()],
+        )

-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, ascore, width, height, text):
+    @classmethod
+    def execute(cls, clip, ascore, width, height, text) -> io.NodeOutput:
        tokens = clip.tokenize(text)
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}))

-class CLIPTextEncodeSDXL:
+
+class CLIPTextEncodeSDXL(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
-            "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
-            "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "text_g": ("STRING", {"multiline": True, "default": "CLIP_G", "dynamicPrompts": True}),
-            "text_l": ("STRING", {"multiline": True, "default": "CLIP_L", "dynamicPrompts": True}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeSDXL",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.Int.Input("width", default=1024, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("height", default=1024, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("crop_w", default=0, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("crop_h", default=0, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("target_width", default=1024, min=0, max=MAX_RESOLUTION),
+                io.Int.Input("target_height", default=1024, min=0, max=MAX_RESOLUTION),
+                io.String.Input("text_g", multiline=True, dynamic_prompts=True),
+                io.String.Input("text_l", multiline=True, dynamic_prompts=True),
+            ],
+            outputs=[io.Conditioning.Output()],
+        )

-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l):
+    @classmethod
+    def execute(cls, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l) -> io.NodeOutput:
        tokens = clip.tokenize(text_g)
        tokens["l"] = clip.tokenize(text_l)["l"]
        if len(tokens["l"]) != len(tokens["g"]):
@ -46,9 +55,17 @@ class CLIPTextEncodeSDXL:
                tokens["l"] += empty["l"]
            while len(tokens["l"]) > len(tokens["g"]):
                tokens["g"] += empty["g"]
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}))

-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner,
-    "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL,
-}
+
+class ClipSdxlExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeSDXLRefiner,
+            CLIPTextEncodeSDXL,
+        ]
+
+
+async def comfy_entrypoint() -> ClipSdxlExtension:
+    return ClipSdxlExtension()
--- a/comfy_extras/nodes/nodes_compositing.py
+++ b/comfy_extras/nodes/nodes_compositing.py
@ -162,8 +162,7 @@ class PorterDuffImageCompositeV2:
            out_images.append(out_image)
            out_alphas.append(out_alpha.squeeze(2))

-        result = (torch.stack(out_images), torch.stack(out_alphas))
-        return result
+        return io.NodeOutput(torch.stack(out_images), torch.stack(out_alphas))


 class PorterDuffImageCompositeV1(PorterDuffImageCompositeV2):
@ -188,41 +187,45 @@ class PorterDuffImageCompositeV1(PorterDuffImageCompositeV2):
        return super().composite(source, destination, mode, source_alpha, destination_alpha)


-class SplitImageWithAlpha:
+class SplitImageWithAlpha(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": ("IMAGE",),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SplitImageWithAlpha",
+            display_name="Split Image with Alpha",
+            category="mask/compositing",
+            inputs=[
+                io.Image.Input("image"),
+            ],
+            outputs=[
+                io.Image.Output(),
+                io.Mask.Output(),
+            ],
+        )

-    CATEGORY = "mask/compositing"
-    RETURN_TYPES = ("IMAGE", "MASK")
-    FUNCTION = "split_image_with_alpha"
-
-    def split_image_with_alpha(self, image: torch.Tensor):
+    @classmethod
+    def execute(cls, image: torch.Tensor) -> io.NodeOutput:
        out_images = [i[:, :, :3] for i in image]
        out_alphas = [i[:, :, 3] if i.shape[2] > 3 else torch.ones_like(i[:, :, 0]) for i in image]
-        result = (torch.stack(out_images), 1.0 - torch.stack(out_alphas))
-        return result
+        return io.NodeOutput(torch.stack(out_images), 1.0 - torch.stack(out_alphas))


-class JoinImageWithAlpha:
+class JoinImageWithAlpha(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": ("IMAGE",),
-                "alpha": ("MASK",),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="JoinImageWithAlpha",
+            display_name="Join Image with Alpha",
+            category="mask/compositing",
+            inputs=[
+                io.Image.Input("image"),
+                io.Mask.Input("alpha"),
+            ],
+            outputs=[io.Image.Output()],
+        )

-    CATEGORY = "mask/compositing"
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "join_image_with_alpha"
-
-    def join_image_with_alpha(self, image: torch.Tensor, alpha: torch.Tensor):
+    @classmethod
+    def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
        batch_size = min(len(image), len(alpha))
        out_images = []

@ -230,8 +233,7 @@ class JoinImageWithAlpha:
        for i in range(batch_size):
            out_images.append(torch.cat((image[i][:, :, :3], alpha[i].unsqueeze(2)), dim=2))

-        result = (torch.stack(out_images),)
-        return result
+        return io.NodeOutput(torch.stack(out_images))


 class Flatten(CustomNode):
--- a/comfy_extras/nodes/nodes_controlnet.py
+++ b/comfy_extras/nodes/nodes_controlnet.py
@ -1,21 +1,28 @@
 import comfy.utils
 from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
-from comfy.nodes.base_nodes import ControlNetApplyAdvanced
+from comfy.nodes import base_nodes as nodes
+
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


-class SetUnionControlNetType:
+class SetUnionControlNetType(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"control_net": ("CONTROL_NET",),
-                             "type": (["auto"] + list(UNION_CONTROLNET_TYPES.keys()),)
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SetUnionControlNetType",
+            category="conditioning/controlnet",
+            inputs=[
+                io.ControlNet.Input("control_net"),
+                io.Combo.Input("type", options=["auto"] + list(UNION_CONTROLNET_TYPES.keys())),
+            ],
+            outputs=[
+                io.ControlNet.Output(),
+            ],
+        )

-    CATEGORY = "conditioning/controlnet"
-    RETURN_TYPES = ("CONTROL_NET",)
-
-    FUNCTION = "set_controlnet_type"
-
-    def set_controlnet_type(self, control_net, type):
+    @classmethod
+    def execute(cls, control_net, type) -> io.NodeOutput:
        control_net = control_net.copy()
        type_number = UNION_CONTROLNET_TYPES.get(type, -1)
        if type_number >= 0:
@ -23,28 +30,36 @@ class SetUnionControlNetType:
        else:
            control_net.set_extra_arg("control_type", [])

-        return (control_net,)
+        return io.NodeOutput(control_net)
+
+    set_controlnet_type = execute  # TODO: remove


-class ControlNetInpaintingAliMamaApply(ControlNetApplyAdvanced):
+class ControlNetInpaintingAliMamaApply(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING",),
-                             "negative": ("CONDITIONING",),
-                             "control_net": ("CONTROL_NET",),
-                             "vae": ("VAE",),
-                             "image": ("IMAGE",),
-                             "mask": ("MASK",),
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
-                             "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
-                             "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ControlNetInpaintingAliMamaApply",
+            category="conditioning/controlnet",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.ControlNet.Input("control_net"),
+                io.Vae.Input("vae"),
+                io.Image.Input("image"),
+                io.Mask.Input("mask"),
+                io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
+                io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+            ],
+        )

-    FUNCTION = "apply_inpaint_controlnet"
-
-    CATEGORY = "conditioning/controlnet"
-
-    def apply_inpaint_controlnet(self, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent):
+    @classmethod
+    def execute(cls, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent) -> io.NodeOutput:
        extra_concat = []
        if control_net.concat_mask:
            mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1]))
@ -52,10 +67,20 @@ class ControlNetInpaintingAliMamaApply(ControlNetApplyAdvanced):
            image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3])
            extra_concat = [mask]

-        return self.apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
+        result = nodes.ControlNetApplyAdvanced().apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=extra_concat)
+        return io.NodeOutput(result[0], result[1])
+
+    apply_inpaint_controlnet = execute  # TODO: remove


-NODE_CLASS_MAPPINGS = {
-    "SetUnionControlNetType": SetUnionControlNetType,
-    "ControlNetInpaintingAliMamaApply": ControlNetInpaintingAliMamaApply,
-}
+class ControlNetExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            SetUnionControlNetType,
+            ControlNetInpaintingAliMamaApply,
+        ]
+
+
+async def comfy_entrypoint() -> ControlNetExtension:
+    return ControlNetExtension()
--- a/comfy_extras/nodes/nodes_differential_diffusion.py
+++ b/comfy_extras/nodes/nodes_differential_diffusion.py
@ -1,34 +1,41 @@
 # code adapted from https://github.com/exx8/differential-diffusion

+from typing_extensions import override
+
 import torch
+from comfy_api.latest import ComfyExtension, io

-class DifferentialDiffusion():
+
+class DifferentialDiffusion(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": ("MODEL", ),
-            },
-            "optional": {
-                "strength": ("FLOAT", {
-                    "default": 1.0,
-                    "min": 0.0,
-                    "max": 1.0,
-                    "step": 0.01,
-                }),
-            }
-        }
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "apply"
-    CATEGORY = "_for_testing"
-    INIT = False
+    def define_schema(cls):
+        return io.Schema(
+            node_id="DifferentialDiffusion",
+            display_name="Differential Diffusion",
+            category="_for_testing",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input(
+                    "strength",
+                    default=1.0,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    optional=True,
+                ),
+            ],
+            outputs=[io.Model.Output()],
+            is_experimental=True,
+        )

-    def apply(self, model, strength=1.0):
+    @classmethod
+    def execute(cls, model, strength=1.0) -> io.NodeOutput:
        model = model.clone()
-        model.set_model_denoise_mask_function(lambda *args, **kwargs: self.forward(*args, **kwargs, strength=strength))
-        return (model, )
+        model.set_model_denoise_mask_function(lambda *args, **kwargs: cls.forward(*args, **kwargs, strength=strength))
+        return io.NodeOutput(model)

-    def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
+    @classmethod
+    def forward(cls, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
        model = extra_options["model"]
        step_sigmas = extra_options["sigmas"]
        sigma_to = model.inner_model.model_sampling.sigma_min
@ -53,9 +60,13 @@ class DifferentialDiffusion():
            return binary_mask


-NODE_CLASS_MAPPINGS = {
-    "DifferentialDiffusion": DifferentialDiffusion,
-}
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "DifferentialDiffusion": "Differential Diffusion",
-}
+class DifferentialDiffusionExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            DifferentialDiffusion,
+        ]
+
+
+async def comfy_entrypoint() -> DifferentialDiffusionExtension:
+    return DifferentialDiffusionExtension()
--- a/comfy_extras/nodes/nodes_easycache.py
+++ b/comfy_extras/nodes/nodes_easycache.py
@ -245,6 +245,8 @@ class EasyCacheHolder:
            self.total_steps_skipped += 1
        batch_offset = x.shape[0] // len(uuids)
        for i, uuid in enumerate(uuids):
+            # slice out only what is relevant to this cond
+            batch_slice = [slice(i*batch_offset,(i+1)*batch_offset)]
            # if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
            if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]:
                if not self.allow_mismatch:
@ -262,9 +264,8 @@ class EasyCacheHolder:
                            slicing.append(slice(None, dim_u))
                    else:
                        slicing.append(slice(None))
-                slicing = [slice(i*batch_offset,(i+1)*batch_offset)] + slicing
-                x = x[slicing]
-            x += self.uuid_cache_diffs[uuid].to(x.device)
+                batch_slice = batch_slice + slicing
+            x[batch_slice] += self.uuid_cache_diffs[uuid].to(x.device)
        return x

    def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]):
--- a/comfy_extras/nodes/nodes_edit_model.py
+++ b/comfy_extras/nodes/nodes_edit_model.py
@ -1,26 +1,38 @@
 from comfy import node_helpers
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


-class ReferenceLatent:
+class ReferenceLatent(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"conditioning": ("CONDITIONING", ),
-                            },
-                "optional": {"latent": ("LATENT", ),}
-               }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ReferenceLatent",
+            category="advanced/conditioning/edit_models",
+            description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+                io.Latent.Input("latent", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ]
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "advanced/conditioning/edit_models"
-    DESCRIPTION = "This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images."
-
-    def append(self, conditioning, latent=None):
+    @classmethod
+    def execute(cls, conditioning, latent=None) -> io.NodeOutput:
        if latent is not None:
            conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
-        return (conditioning, )
+        return io.NodeOutput(conditioning)


-NODE_CLASS_MAPPINGS = {
-    "ReferenceLatent": ReferenceLatent,
-}
+class EditModelExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            ReferenceLatent,
+        ]
+
+
+def comfy_entrypoint() -> EditModelExtension:
+    return EditModelExtension()
--- a/comfy_extras/nodes/nodes_flux.py
+++ b/comfy_extras/nodes/nodes_flux.py
@ -1,112 +1,150 @@
 from comfy import node_helpers
 import comfy.utils
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+
 from ..constants.resolutions import KONTEXT_RESOLUTIONS


-class CLIPTextEncodeFlux:
+class CLIPTextEncodeFlux(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP",),
-            "clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "t5xxl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "guidance": ("FLOAT", {"default": 3.5, "min": 0.0, "max": 100.0, "step": 0.1}),
-        }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeFlux",
+            category="advanced/conditioning/flux",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("clip_l", multiline=True, dynamic_prompts=True),
+                io.String.Input("t5xxl", multiline=True, dynamic_prompts=True),
+                io.Float.Input("guidance", default=3.5, min=0.0, max=100.0, step=0.1),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
-
-    CATEGORY = "advanced/conditioning/flux"
-
-    def encode(self, clip, clip_l, t5xxl, guidance):
+    @classmethod
+    def execute(cls, clip, clip_l, t5xxl, guidance) -> io.NodeOutput:
        tokens = clip.tokenize(clip_l)
        tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]

-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"guidance": guidance}),)
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"guidance": guidance}))
+        encode = execute  # TODO: remove


-class FluxGuidance:
+class FluxGuidance(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "conditioning": ("CONDITIONING",),
-            "guidance": ("FLOAT", {"default": 3.5, "min": -100.0, "max": 100.0, "step": 0.1}),
-        }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FluxGuidance",
+            category="advanced/conditioning/flux",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+                # guidance is allowed to be negative to support inverse sampling
+                io.Float.Input("guidance", default=3.5, min=-100.0, max=100.0, step=0.1),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "advanced/conditioning/flux"
-
-    def append(self, conditioning, guidance):
+    @classmethod
+    def execute(cls, conditioning, guidance) -> io.NodeOutput:
        c = node_helpers.conditioning_set_values(conditioning, {"guidance": guidance})
-        return (c,)
+        return io.NodeOutput(c)
+
+    append = execute  # TODO: remove


-class FluxDisableGuidance:
+class FluxDisableGuidance(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "conditioning": ("CONDITIONING",),
-        }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FluxDisableGuidance",
+            category="advanced/conditioning/flux",
+            description="This node completely disables the guidance embed on Flux and Flux like models",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "advanced/conditioning/flux"
-    DESCRIPTION = "This node completely disables the guidance embed on Flux and Flux like models"
-
-    def append(self, conditioning):
+    @classmethod
+    def execute(cls, conditioning) -> io.NodeOutput:
        c = node_helpers.conditioning_set_values(conditioning, {"guidance": None})
-        return (c,)
+        return io.NodeOutput(c)
+        append = execute  # TODO: remove


-class FluxKontextImageScale:
+class FluxKontextImageScale(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"image": ("IMAGE",),
-                             },
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FluxKontextImageScale",
+            category="advanced/conditioning/flux",
+            description="This node resizes the image to one that is more optimal for flux kontext.",
+            inputs=[
+                io.Image.Input("image"),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )

-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "scale"
-
-    CATEGORY = "advanced/conditioning/flux"
-    DESCRIPTION = "This node resizes the image to one that is more optimal for flux kontext."
-
-    def scale(self, image):
+    @classmethod
+    def execute(cls, image) -> io.NodeOutput:
        width = image.shape[2]
        height = image.shape[1]
        aspect_ratio = width / height
        _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in KONTEXT_RESOLUTIONS)
        image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
-        return (image,)
+        return io.NodeOutput(image)
+
+    scale = execute  # TODO: remove


-class FluxKontextMultiReferenceLatentMethod:
+class FluxKontextMultiReferenceLatentMethod(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "conditioning": ("CONDITIONING", ),
-            "reference_latents_method": (("offset", "index", "uxo/uno"), ),
-            }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FluxKontextMultiReferenceLatentMethod",
+            category="advanced/conditioning/flux",
+            inputs=[
+                io.Conditioning.Input("conditioning"),
+                io.Combo.Input(
+                    "reference_latents_method",
+                    options=["offset", "index", "uxo/uno"],
+                ),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+            is_experimental=True,
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-    EXPERIMENTAL = True
-
-    CATEGORY = "advanced/conditioning/flux"
-
-    def append(self, conditioning, reference_latents_method):
+    @classmethod
+    def execute(cls, conditioning, reference_latents_method) -> io.NodeOutput:
        if "uxo" in reference_latents_method or "uso" in reference_latents_method:
            reference_latents_method = "uxo"
        c = node_helpers.conditioning_set_values(conditioning, {"reference_latents_method": reference_latents_method})
-        return (c, )
+        return io.NodeOutput(c)

-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeFlux": CLIPTextEncodeFlux,
-    "FluxGuidance": FluxGuidance,
-    "FluxDisableGuidance": FluxDisableGuidance,
-    "FluxKontextImageScale": FluxKontextImageScale,
-    "FluxKontextMultiReferenceLatentMethod": FluxKontextMultiReferenceLatentMethod,
-}
+    append = execute  # TODO: remove
+
+
+class FluxExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeFlux,
+            FluxGuidance,
+            FluxDisableGuidance,
+            FluxKontextImageScale,
+            FluxKontextMultiReferenceLatentMethod,
+        ]
+
+
+async def comfy_entrypoint() -> FluxExtension:
+    return FluxExtension()
--- a/comfy_extras/nodes/nodes_fresca.py
+++ b/comfy_extras/nodes/nodes_fresca.py
@ -1,6 +1,8 @@
 # Code based on https://github.com/WikiChao/FreSca (MIT License)
 import torch
 import torch.fft as fft
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


 def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
@ -51,25 +53,31 @@ def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
    return x_filtered


-class FreSca:
+class FreSca(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": ("MODEL",),
-                "scale_low": ("FLOAT", {"default": 1.0, "min": 0, "max": 10, "step": 0.01,
-                                        "tooltip": "Scaling factor for low-frequency components"}),
-                "scale_high": ("FLOAT", {"default": 1.25, "min": 0, "max": 10, "step": 0.01,
-                                        "tooltip": "Scaling factor for high-frequency components"}),
-                "freq_cutoff": ("INT", {"default": 20, "min": 1, "max": 10000, "step": 1,
-                                        "tooltip": "Number of frequency indices around center to consider as low-frequency"}),
-            }
-        }
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-    CATEGORY = "_for_testing"
-    DESCRIPTION = "Applies frequency-dependent scaling to the guidance"
-    def patch(self, model, scale_low, scale_high, freq_cutoff):
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FreSca",
+            display_name="FreSca",
+            category="_for_testing",
+            description="Applies frequency-dependent scaling to the guidance",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("scale_low", default=1.0, min=0, max=10, step=0.01,
+                               tooltip="Scaling factor for low-frequency components"),
+                io.Float.Input("scale_high", default=1.25, min=0, max=10, step=0.01,
+                               tooltip="Scaling factor for high-frequency components"),
+                io.Int.Input("freq_cutoff", default=20, min=1, max=10000, step=1,
+                             tooltip="Number of frequency indices around center to consider as low-frequency"),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+            is_experimental=True,
+        )
+
+    @classmethod
+    def execute(cls, model, scale_low, scale_high, freq_cutoff):
        def custom_cfg_function(args):
            conds_out = args["conds_out"]
            if len(conds_out) <= 1 or None in args["conds"][:2]:
@ -91,13 +99,16 @@ class FreSca:
        m = model.clone()
        m.set_model_sampler_pre_cfg_function(custom_cfg_function)

-        return (m,)
+        return io.NodeOutput(m)


-NODE_CLASS_MAPPINGS = {
-    "FreSca": FreSca,
-}
+class FreScaExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            FreSca,
+        ]

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "FreSca": "FreSca",
-}
+
+async def comfy_entrypoint() -> FreScaExtension:
+    return FreScaExtension()
--- a/comfy_extras/nodes/nodes_gits.py
+++ b/comfy_extras/nodes/nodes_gits.py
@ -1,6 +1,8 @@
 # from https://github.com/zju-pi/diff-sampler/tree/main/gits-main
 import numpy as np
 import torch
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io

 def loglinear_interp(t_steps, num_steps):
    """
@ -333,25 +335,28 @@ NOISE_LEVELS = {
    ],
 }

-class GITSScheduler:
+class GITSScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"coeff": ("FLOAT", {"default": 1.20, "min": 0.80, "max": 1.50, "step": 0.05}),
-                     "steps": ("INT", {"default": 10, "min": 2, "max": 1000}),
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                      }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="GITSScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05),
+                io.Int.Input("steps", default=10, min=2, max=1000),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ],
+        )

-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, coeff, steps, denoise):
+    @classmethod
+    def execute(cls, coeff, steps, denoise):
        total_steps = steps
        if denoise < 1.0:
            if denoise <= 0.0:
-                return (torch.FloatTensor([]),)
+                return io.NodeOutput(torch.FloatTensor([]))
            total_steps = round(steps * denoise)

        if steps <= 20:
@ -362,8 +367,16 @@ class GITSScheduler:

        sigmas = sigmas[-(total_steps + 1):]
        sigmas[-1] = 0
-        return (torch.FloatTensor(sigmas), )
+        return io.NodeOutput(torch.FloatTensor(sigmas))

-NODE_CLASS_MAPPINGS = {
-    "GITSScheduler": GITSScheduler,
-}
+
+class GITSSchedulerExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            GITSScheduler,
+        ]
+
+
+async def comfy_entrypoint() -> GITSSchedulerExtension:
+    return GITSSchedulerExtension()
--- a/comfy_extras/nodes/nodes_hidream.py
+++ b/comfy_extras/nodes/nodes_hidream.py
@ -1,59 +1,76 @@
-import comfy.model_management
+from typing_extensions import override
+
 import comfy.sd
+import comfy.model_management
+from comfy_api.latest import ComfyExtension, io
 from comfy.cmd import folder_paths
 from comfy import model_downloader


-class QuadrupleCLIPLoader:
+class QuadrupleCLIPLoader(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"clip_name1": (model_downloader.get_filename_list("text_encoders"),),
-                             "clip_name2": (model_downloader.get_filename_list("text_encoders"),),
-                             "clip_name3": (model_downloader.get_filename_list("text_encoders"),),
-                             "clip_name4": (model_downloader.get_filename_list("text_encoders"),)
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="QuadrupleCLIPLoader",
+            category="advanced/loaders",
+            description="[Recipes]\n\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct",
+            inputs=[
+                io.Combo.Input("clip_name1", options=model_downloader.get_filename_list("text_encoders")),
+                io.Combo.Input("clip_name2", options=model_downloader.get_filename_list("text_encoders")),
+                io.Combo.Input("clip_name3", options=model_downloader.get_filename_list("text_encoders")),
+                io.Combo.Input("clip_name4", options=model_downloader.get_filename_list("text_encoders")),
+            ],
+            outputs=[
+                io.Clip.Output(),
+            ]
+        )

-    RETURN_TYPES = ("CLIP",)
-    FUNCTION = "load_clip"
-
-    CATEGORY = "advanced/loaders"
-
-    DESCRIPTION = "[Recipes]\n\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct"
-
-    def load_clip(self, clip_name1, clip_name2, clip_name3, clip_name4):
+    @classmethod
+    def execute(cls, clip_name1, clip_name2, clip_name3, clip_name4):
        clip_path1 = model_downloader.get_full_path_or_raise("text_encoders", clip_name1)
        clip_path2 = model_downloader.get_full_path_or_raise("text_encoders", clip_name2)
        clip_path3 = model_downloader.get_full_path_or_raise("text_encoders", clip_name3)
        clip_path4 = model_downloader.get_full_path_or_raise("text_encoders", clip_name4)
        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2, clip_path3, clip_path4], embedding_directory=folder_paths.get_folder_paths("embeddings"))
-        return (clip,)
+        return io.NodeOutput(clip)


-class CLIPTextEncodeHiDream:
+class CLIPTextEncodeHiDream(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP",),
-            "clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "clip_g": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "t5xxl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "llama": ("STRING", {"multiline": True, "dynamicPrompts": True})
-        }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeHiDream",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("clip_l", multiline=True, dynamic_prompts=True),
+                io.String.Input("clip_g", multiline=True, dynamic_prompts=True),
+                io.String.Input("t5xxl", multiline=True, dynamic_prompts=True),
+                io.String.Input("llama", multiline=True, dynamic_prompts=True),
+            ],

-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+            outputs=[
+                io.Conditioning.Output(),
+            ]
+        )

-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, clip_l, clip_g, t5xxl, llama):
+    @classmethod
+    def execute(cls, clip, clip_l, clip_g, t5xxl, llama):
        tokens = clip.tokenize(clip_g)
        tokens["l"] = clip.tokenize(clip_l)["l"]
        tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
        tokens["llama"] = clip.tokenize(llama)["llama"]
-        return (clip.encode_from_tokens_scheduled(tokens),)
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))


-NODE_CLASS_MAPPINGS = {
-    "QuadrupleCLIPLoader": QuadrupleCLIPLoader,
-    "CLIPTextEncodeHiDream": CLIPTextEncodeHiDream,
-}
+class HiDreamExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            QuadrupleCLIPLoader,
+            CLIPTextEncodeHiDream,
+        ]
+
+
+async def comfy_entrypoint() -> HiDreamExtension:
+    return HiDreamExtension()
--- a/comfy_extras/nodes/nodes_hunyuan.py
+++ b/comfy_extras/nodes/nodes_hunyuan.py
@ -1,44 +1,64 @@
 import torch

 import comfy.model_management
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+
 from comfy.nodes.common import MAX_RESOLUTION
 from comfy.nodes import base_nodes as nodes
 from comfy import node_helpers

-class CLIPTextEncodeHunyuanDiT:
+
+class CLIPTextEncodeHunyuanDiT(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "bert": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "mt5xl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeHunyuanDiT",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("bert", multiline=True, dynamic_prompts=True),
+                io.String.Input("mt5xl", multiline=True, dynamic_prompts=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )

-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, bert, mt5xl):
+    @classmethod
+    def execute(cls, clip, bert, mt5xl) -> io.NodeOutput:
        tokens = clip.tokenize(bert)
        tokens["mt5xl"] = clip.tokenize(mt5xl)["mt5xl"]

-        return (clip.encode_from_tokens_scheduled(tokens), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))

-class EmptyHunyuanLatentVideo:
+    encode = execute  # TODO: remove
+
+
+class EmptyHunyuanLatentVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "width": ("INT", {"default": 848, "min": 16, "max": MAX_RESOLUTION, "step": 16}),
-                              "height": ("INT", {"default": 480, "min": 16, "max": MAX_RESOLUTION, "step": 16}),
-                              "length": ("INT", {"default": 25, "min": 1, "max": MAX_RESOLUTION, "step": 4}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyHunyuanLatentVideo",
+            category="latent/video",
+            inputs=[
+                io.Int.Input("width", default=848, min=16, max=MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=480, min=16, max=MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=25, min=1, max=MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    CATEGORY = "latent/video"
-
-    def generate(self, width, height, length, batch_size=1):
+    @classmethod
+    def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        return ({"samples":latent}, )
+        return io.NodeOutput({"samples": latent})
+
+    generate = execute  # TODO: remove
+

 PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
    "<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
@ -51,45 +71,62 @@ PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
    "<|start_header_id|>assistant<|end_header_id|>\n\n"
 )

-class TextEncodeHunyuanVideo_ImageToVideo:
+
+class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "clip_vision_output": ("CLIP_VISION_OUTPUT", ),
-            "prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TextEncodeHunyuanVideo_ImageToVideo",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.ClipVisionOutput.Input("clip_vision_output"),
+                io.String.Input("prompt", multiline=True, dynamic_prompts=True),
+                io.Int.Input(
+                    "image_interleave",
+                    default=2,
+                    min=1,
+                    max=512,
+                    tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.",
+                ),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )

-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, clip_vision_output, prompt, image_interleave):
+    @classmethod
+    def execute(cls, clip, clip_vision_output, prompt, image_interleave) -> io.NodeOutput:
        tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave)
-        return (clip.encode_from_tokens_scheduled(tokens), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))

-class HunyuanImageToVideo:
+    encode = execute  # TODO: remove
+
+
+class HunyuanImageToVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "vae": ("VAE", ),
-                             "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "length": ("INT", {"default": 53, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                             "guidance_type": (["v1 (concat)", "v2 (replace)", "custom"], )
-                },
-                "optional": {"start_image": ("IMAGE", ),
-                }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanImageToVideo",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=53, min=1, max=nodes.MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Combo.Input("guidance_type", options=["v1 (concat)", "v2 (replace)", "custom"]),
+                io.Image.Input("start_image", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "latent")
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/video_models"
-
-    def encode(self, positive, vae, width, height, length, batch_size, guidance_type, start_image=None):
+    @classmethod
+    def execute(cls, positive, vae, width, height, length, batch_size, guidance_type, start_image=None) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
        out_latent = {}

@ -114,51 +151,76 @@ class HunyuanImageToVideo:
            positive = node_helpers.conditioning_set_values(positive, cond)

        out_latent["samples"] = latent
-        return (positive, out_latent)
+        return io.NodeOutput(positive, out_latent)

-class EmptyHunyuanImageLatent:
+    encode = execute  # TODO: remove
+
+
+class EmptyHunyuanImageLatent(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "width": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                              "height": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyHunyuanImageLatent",
+            category="latent",
+            inputs=[
+                io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    CATEGORY = "latent"
-
-    def generate(self, width, height, batch_size=1):
+    @classmethod
+    def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 64, height // 32, width // 32], device=comfy.model_management.intermediate_device())
-        return ({"samples":latent}, )
+        return io.NodeOutput({"samples": latent})

-class HunyuanRefinerLatent:
+    generate = execute  # TODO: remove
+
+
+class HunyuanRefinerLatent(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "negative": ("CONDITIONING", ),
-                             "latent": ("LATENT", ),
-                             "noise_augmentation": ("FLOAT", {"default": 0.10, "min": 0.0, "max": 1.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanRefinerLatent",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Latent.Input("latent"),
+                io.Float.Input("noise_augmentation", default=0.10, min=0.0, max=1.0, step=0.01),

-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )

-    FUNCTION = "execute"
-
-    def execute(self, positive, negative, latent, noise_augmentation):
+    @classmethod
+    def execute(cls, positive, negative, latent, noise_augmentation) -> io.NodeOutput:
        latent = latent["samples"]
        positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
        negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
        out_latent = {}
        out_latent["samples"] = torch.zeros([latent.shape[0], 32, latent.shape[-3], latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
-        return (positive, negative, out_latent)
+        return io.NodeOutput(positive, negative, out_latent)


-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeHunyuanDiT": CLIPTextEncodeHunyuanDiT,
-    "TextEncodeHunyuanVideo_ImageToVideo": TextEncodeHunyuanVideo_ImageToVideo,
-    "EmptyHunyuanLatentVideo": EmptyHunyuanLatentVideo,
-    "HunyuanImageToVideo": HunyuanImageToVideo,
-    "EmptyHunyuanImageLatent": EmptyHunyuanImageLatent,
-    "HunyuanRefinerLatent": HunyuanRefinerLatent,
-}
+class HunyuanExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeHunyuanDiT,
+            TextEncodeHunyuanVideo_ImageToVideo,
+            EmptyHunyuanLatentVideo,
+            HunyuanImageToVideo,
+            EmptyHunyuanImageLatent,
+            HunyuanRefinerLatent,
+        ]
+
+
+async def comfy_entrypoint() -> HunyuanExtension:
+    return HunyuanExtension()
--- a/comfy_extras/nodes/nodes_hypertile.py
+++ b/comfy_extras/nodes/nodes_hypertile.py
@ -1,9 +1,12 @@
-#Taken from: https://github.com/tfernd/HyperTile/
+# Taken from: https://github.com/tfernd/HyperTile/

 import math
+from typing_extensions import override
 from einops import rearrange
 # Use torch rng for consistency across generations
 from torch import randint  # pylint: disable=no-name-in-module
+from comfy_api.latest import ComfyExtension, io
+

 def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
    min_value = min(min_value, value)
@ -20,25 +23,32 @@ def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:

    return ns[idx]

-class HyperTile:
+
+class HyperTile(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "model": ("MODEL",),
-                             "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}),
-                             "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}),
-                             "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}),
-                             "scale_depth": ("BOOLEAN", {"default": False}),
-                              }}
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HyperTile",
+            category="model_patches/unet",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("tile_size", default=256, min=1, max=2048),
+                io.Int.Input("swap_size", default=2, min=1, max=128),
+                io.Int.Input("max_depth", default=0, min=0, max=10),
+                io.Boolean.Input("scale_depth", default=False),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    CATEGORY = "model_patches/unet"
-
-    def patch(self, model, tile_size, swap_size, max_depth, scale_depth):
+    @classmethod
+    def execute(cls, model, tile_size, swap_size, max_depth, scale_depth) -> io.NodeOutput:
        latent_tile_size = max(32, tile_size) // 8
-        self.temp = None
+        temp = None

        def hypertile_in(q, k, v, extra_options):
+            nonlocal temp
            model_chans = q.shape[-2]
            orig_shape = extra_options['original_shape']
            apply_to = []
@ -58,24 +68,33 @@ class HyperTile:

                if nh * nw > 1:
                    q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
-                    self.temp = (nh, nw, h, w)
+                    temp = (nh, nw, h, w)
                return q, k, v

            return q, k, v
+
        def hypertile_out(out, extra_options):
-            if self.temp is not None:
-                nh, nw, h, w = self.temp
-                self.temp = None
+            nonlocal temp
+            if temp is not None:
+                nh, nw, h, w = temp
+                temp = None
                out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
                out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
            return out

-
        m = model.clone()
        m.set_model_attn1_patch(hypertile_in)
        m.set_model_attn1_output_patch(hypertile_out)
-        return (m, )
+        return (m,)

-NODE_CLASS_MAPPINGS = {
-    "HyperTile": HyperTile,
-}
+
+class HyperTileExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            HyperTile,
+        ]
+
+
+async def comfy_entrypoint() -> HyperTileExtension:
+    return HyperTileExtension()
--- a/comfy_extras/nodes/nodes_ip2p.py
+++ b/comfy_extras/nodes/nodes_ip2p.py
@ -1,21 +1,30 @@
 import torch

-class InstructPixToPixConditioning:
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+
+
+class InstructPixToPixConditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "negative": ("CONDITIONING", ),
-                             "vae": ("VAE", ),
-                             "pixels": ("IMAGE", ),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="InstructPixToPixConditioning",
+            category="conditioning/instructpix2pix",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Vae.Input("vae"),
+                io.Image.Input("pixels"),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/instructpix2pix"
-
-    def encode(self, positive, negative, pixels, vae):
+    @classmethod
+    def execute(cls, positive, negative, pixels, vae) -> io.NodeOutput:
        x = (pixels.shape[1] // 8) * 8
        y = (pixels.shape[2] // 8) * 8

@ -38,8 +47,17 @@ class InstructPixToPixConditioning:
                n = [t[0], d]
                c.append(n)
            out.append(c)
-        return (out[0], out[1], out_latent)
+        return io.NodeOutput(out[0], out[1], out_latent)
+
+
+class InstructPix2PixExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            InstructPixToPixConditioning,
+        ]
+
+
+async def comfy_entrypoint() -> InstructPix2PixExtension:
+    return InstructPix2PixExtension()

-NODE_CLASS_MAPPINGS = {
-    "InstructPixToPixConditioning": InstructPixToPixConditioning,
-}
--- a/comfy_extras/nodes/nodes_latent.py
+++ b/comfy_extras/nodes/nodes_latent.py
@ -1,11 +1,12 @@
 import torch
 from comfy.nodes import base_nodes as nodes

-
 import comfy.utils
 from comfy.component_model.tensor_types import Latent
 from comfy.nodes.package_typing import Seed, Seed64
 from .nodes_post_processing import gaussian_kernel
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


 def reshape_latent_to(target_shape, latent, repeat_batch=True):
@ -17,17 +18,23 @@ def reshape_latent_to(target_shape, latent, repeat_batch=True):
        return latent


-class LatentAdd:
+class LatentAdd(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples1": ("LATENT",), "samples2": ("LATENT",)}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentAdd",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples1, samples2):
+    @classmethod
+    def execute(cls, samples1, samples2) -> io.NodeOutput:
        samples_out = samples1.copy()

        s1 = samples1["samples"]
@ -35,20 +42,26 @@ class LatentAdd:

        s2 = reshape_latent_to(s1.shape, s2)
        samples_out["samples"] = s1 + s2
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentSubtract:
+class LatentSubtract(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples1": ("LATENT",), "samples2": ("LATENT",)}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentSubtract",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples1, samples2):
+    @classmethod
+    def execute(cls, samples1, samples2) -> io.NodeOutput:
        samples_out = samples1.copy()

        s1 = samples1["samples"]
@ -56,43 +69,51 @@ class LatentSubtract:

        s2 = reshape_latent_to(s1.shape, s2)
        samples_out["samples"] = s1 - s2
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentMultiply:
+class LatentMultiply(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples": ("LATENT",),
-                             "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentMultiply",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples"),
+                io.Float.Input("multiplier", default=1.0, min=-10.0, max=10.0, step=0.01),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples, multiplier):
+    @classmethod
+    def execute(cls, samples, multiplier) -> io.NodeOutput:
        samples_out = samples.copy()

        s1 = samples["samples"]
        samples_out["samples"] = s1 * multiplier
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentInterpolate:
+class LatentInterpolate(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples1": ("LATENT",),
-                             "samples2": ("LATENT",),
-                             "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentInterpolate",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+                io.Float.Input("ratio", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples1, samples2, ratio):
+    @classmethod
+    def execute(cls, samples1, samples2, ratio) -> io.NodeOutput:
        samples_out = samples1.copy()

        s1 = samples1["samples"]
@ -111,19 +132,27 @@ class LatentInterpolate:
        st = torch.nan_to_num(t / mt)

        samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
-        return (samples_out,)
+        return io.NodeOutput(samples_out)

-class LatentConcat:
+
+class LatentConcat(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",), "dim": (["x", "-x", "y", "-y", "t", "-t"], )}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentConcat",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+                io.Combo.Input("dim", options=["x", "-x", "y", "-y", "t", "-t"]),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples1, samples2, dim):
+    @classmethod
+    def execute(cls, samples1, samples2, dim) -> io.NodeOutput:
        samples_out = samples1.copy()

        s1 = samples1["samples"]
@ -143,22 +172,28 @@ class LatentConcat:
            dim = -3

        samples_out["samples"] = torch.cat(c, dim=dim)
-        return (samples_out,)
+        return io.NodeOutput(samples_out)

-class LatentCut:
+
+class LatentCut(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples": ("LATENT",),
-                             "dim": (["x", "y", "t"], ),
-                             "index": ("INT", {"default": 0, "min": -nodes.MAX_RESOLUTION, "max": nodes.MAX_RESOLUTION, "step": 1}),
-                             "amount": ("INT", {"default": 1, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 1})}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentCut",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples"),
+                io.Combo.Input("dim", options=["x", "y", "t"]),
+                io.Int.Input("index", default=0, min=-nodes.MAX_RESOLUTION, max=nodes.MAX_RESOLUTION, step=1),
+                io.Int.Input("amount", default=1, min=1, max=nodes.MAX_RESOLUTION, step=1),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples, dim, index, amount):
+    @classmethod
+    def execute(cls, samples, dim, index, amount) -> io.NodeOutput:
        samples_out = samples.copy()

        s1 = samples["samples"]
@ -178,20 +213,26 @@ class LatentCut:
            amount = min(-index, amount)

        samples_out["samples"] = torch.narrow(s1, dim, index, amount)
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentBatch:
+class LatentBatch(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples1": ("LATENT",), "samples2": ("LATENT",)}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentBatch",
+            category="latent/batch",
+            inputs=[
+                io.Latent.Input("samples1"),
+                io.Latent.Input("samples2"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "batch"
-
-    CATEGORY = "latent/batch"
-
-    def batch(self, samples1, samples2):
+    @classmethod
+    def execute(cls, samples1, samples2) -> io.NodeOutput:
        samples_out = samples1.copy()
        s1 = samples1["samples"]
        s2 = samples2["samples"]
@ -200,21 +241,26 @@ class LatentBatch:
        s = torch.cat((s1, s2), dim=0)
        samples_out["samples"] = s
        samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])])
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentBatchSeedBehavior:
+class LatentBatchSeedBehavior(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples": ("LATENT",),
-                             "seed_behavior": (["random", "fixed"], {"default": "fixed"}), }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentBatchSeedBehavior",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples"),
+                io.Combo.Input("seed_behavior", options=["random", "fixed"], default="fixed"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced"
-
-    def op(self, samples, seed_behavior):
+    @classmethod
+    def execute(cls, samples, seed_behavior) -> io.NodeOutput:
        samples_out = samples.copy()
        latent = samples["samples"]
        if seed_behavior == "random":
@ -224,31 +270,29 @@ class LatentBatchSeedBehavior:
            batch_number = samples_out.get("batch_index", [0])[0]
            samples_out["batch_index"] = [batch_number] * latent.shape[0]

-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentAddNoiseChannels:
-    def __init__(self):
-        pass
+class LatentAddNoiseChannels(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentAddNoiseChannels",
+            category="latent/advanced",
+            inputs=[
+                io.Latent.Input("samples"),
+                io.Float.Input("std_dev", default=0.1, min=0.0, max=1.0, step=0.01),
+                io.Int.Input("seed", min=0, max=0xfffffffffffffff, step=1, control_after_generate=True),
+                io.Int.Input("slice_i", default=0, min=-16, max=16),
+                io.Int.Input("slice_j", default=16, min=-16, max=16),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "samples": ("LATENT",),
-                "std_dev": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 1.0, "step": 0.01}),
-                "seed": Seed64,
-                "slice_i": ("INT", {"default": 0, "min": -16, "max": 16}),
-                "slice_j": ("INT", {"default": 16, "min": -16, "max": 16}),
-            }
-        }
-
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "inject_noise"
-
-    CATEGORY = "latent/advanced"
-
-    def inject_noise(self, samples: Latent, std_dev, seed: int, slice_i: int, slice_j: int):
+    def execute(cls, samples: Latent, std_dev, seed: int, slice_i: int, slice_j: int):
        s = samples.copy()

        latent = samples["samples"]
@ -261,44 +305,52 @@ class LatentAddNoiseChannels:
        noised_latent = latent.clone()
        noised_latent[:, slice_i:slice_j, :, :] += noise
        s["samples"] = noised_latent
-        return (s,)
+        return io.NodeOutput(s)


-class LatentApplyOperation:
+class LatentApplyOperation(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"samples": ("LATENT",),
-                             "operation": ("LATENT_OPERATION",),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentApplyOperation",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Latent.Input("samples"),
+                io.LatentOperation.Input("operation"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced/operations"
-    EXPERIMENTAL = True
-
-    def op(self, samples, operation):
+    @classmethod
+    def execute(cls, samples, operation) -> io.NodeOutput:
        samples_out = samples.copy()

        s1 = samples["samples"]
        samples_out["samples"] = operation(latent=s1)
-        return (samples_out,)
+        return io.NodeOutput(samples_out)


-class LatentApplyOperationCFG:
+class LatentApplyOperationCFG(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL",),
-                             "operation": ("LATENT_OPERATION",),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentApplyOperationCFG",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.LatentOperation.Input("operation"),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-
-    CATEGORY = "latent/advanced/operations"
-    EXPERIMENTAL = True
-
-    def patch(self, model, operation):
+    @classmethod
+    def execute(cls, model, operation) -> io.NodeOutput:
        m = model.clone()

        def pre_cfg_function(args):
@ -310,22 +362,26 @@ class LatentApplyOperationCFG:
            return conds_out

        m.set_model_sampler_pre_cfg_function(pre_cfg_function)
-        return (m,)
+        return io.NodeOutput(m)


-class LatentOperationTonemapReinhard:
+class LatentOperationTonemapReinhard(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"multiplier": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentOperationTonemapReinhard",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Float.Input("multiplier", default=1.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.LatentOperation.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT_OPERATION",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced/operations"
-    EXPERIMENTAL = True
-
-    def op(self, multiplier):
+    @classmethod
+    def execute(cls, multiplier) -> io.NodeOutput:
        def tonemap_reinhard(latent, **kwargs):
            latent_vector_magnitude = (torch.linalg.vector_norm(latent, dim=(1)) + 0.0000000001)[:, None]
            normalized_latent = latent / latent_vector_magnitude
@ -342,40 +398,39 @@ class LatentOperationTonemapReinhard:

            return normalized_latent * new_magnitude

-        return (tonemap_reinhard,)
+        return io.NodeOutput(tonemap_reinhard)


-class LatentOperationSharpen:
+class LatentOperationSharpen(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "sharpen_radius": ("INT", {
-                "default": 9,
-                "min": 1,
-                "max": 31,
-                "step": 1
-            }),
-            "sigma": ("FLOAT", {
-                "default": 1.0,
-                "min": 0.1,
-                "max": 10.0,
-                "step": 0.1
-            }),
-            "alpha": ("FLOAT", {
-                "default": 0.1,
-                "min": 0.0,
-                "max": 5.0,
-                "step": 0.01
-            }),
-        }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LatentOperationSharpen",
+            category="latent/advanced/operations",
+            is_experimental=True,
+            inputs=[
+                io.Int.Input("sharpen_radius", default=9,
+                             min=1,
+                             max=31,
+                             step=1),
+                io.Float.Input(
+                    "sigma", default=1.0,
+                    min=0.1,
+                    max=10.0,
+                    step=0.1),
+                io.Float.Input(
+                    "alpha", default=0.1,
+                    min=0.0,
+                    max=5.0,
+                    step=0.01),
+            ],
+            outputs=[
+                io.LatentOperation.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT_OPERATION",)
-    FUNCTION = "op"
-
-    CATEGORY = "latent/advanced/operations"
-    EXPERIMENTAL = True
-
-    def op(self, sharpen_radius, sigma, alpha):
+    @classmethod
+    def execute(cls, sharpen_radius, sigma, alpha) -> io.NodeOutput:
        def sharpen(latent, **kwargs):
            luminance = (torch.linalg.vector_norm(latent, dim=(1)) + 1e-6)[:, None]
            normalized_latent = latent / luminance
@ -393,21 +448,28 @@ class LatentOperationSharpen:

            return luminance * sharpened

-        return (sharpen,)
+        return io.NodeOutput(sharpen)


-NODE_CLASS_MAPPINGS = {
-    "LatentAdd": LatentAdd,
-    "LatentSubtract": LatentSubtract,
-    "LatentMultiply": LatentMultiply,
-    "LatentInterpolate": LatentInterpolate,
-    "LatentConcat": LatentConcat,
-    "LatentCut": LatentCut,
-    "LatentBatch": LatentBatch,
-    "LatentBatchSeedBehavior": LatentBatchSeedBehavior,
-    "LatentAddNoiseChannels": LatentAddNoiseChannels,
-    "LatentApplyOperation": LatentApplyOperation,
-    "LatentApplyOperationCFG": LatentApplyOperationCFG,
-    "LatentOperationTonemapReinhard": LatentOperationTonemapReinhard,
-    "LatentOperationSharpen": LatentOperationSharpen,
-}
+class LatentExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            LatentAdd,
+            LatentSubtract,
+            LatentMultiply,
+            LatentInterpolate,
+            LatentConcat,
+            LatentCut,
+            LatentBatch,
+            LatentBatchSeedBehavior,
+            LatentAddNoiseChannels,
+            LatentApplyOperation,
+            LatentApplyOperationCFG,
+            LatentOperationTonemapReinhard,
+            LatentOperationSharpen,
+        ]
+
+
+async def comfy_entrypoint() -> LatentExtension:
+    return LatentExtension()
--- a/comfy_extras/nodes/nodes_lora_extract.py
+++ b/comfy_extras/nodes/nodes_lora_extract.py
@ -7,9 +7,13 @@ import torch
 import comfy.model_management
 import comfy.utils
 from comfy.cmd import folder_paths
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io

 CLAMP_QUANTILE = 0.99

+logger = logging.getLogger(__name__)
+

 def extract_lora(diff, rank):
    conv2d = (len(diff.shape) == 4)
@ -68,7 +72,7 @@ def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora
                    output_sd["{}{}.lora_up.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[0].contiguous().half().cpu()
                    output_sd["{}{}.lora_down.weight".format(prefix_lora, k[len(prefix_model):-7])] = out[1].contiguous().half().cpu()
                except:
-                    logging.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k))
+                    logger.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k))
            elif lora_type == LORAType.FULL_DIFF:
                output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().half().cpu()

@ -77,32 +81,40 @@ def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora
    return output_sd


-class LoraSave:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
+class LoraSave(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoraSave",
+            display_name="Extract and Save Lora",
+            category="_for_testing",
+            inputs=[
+                io.String.Input("filename_prefix", default="loras/ComfyUI_extracted_lora"),
+                io.Int.Input("rank", default=8, min=1, max=4096, step=1),
+                io.Combo.Input("lora_type", options=tuple(LORA_TYPES.keys())),
+                io.Boolean.Input("bias_diff", default=True),
+                io.Model.Input(
+                    "model_diff",
+                    tooltip="The ModelSubtract output to be converted to a lora.",
+                    optional=True,
+                ),
+                io.Clip.Input(
+                    "text_encoder_diff",
+                    tooltip="The CLIPSubtract output to be converted to a lora.",
+                    optional=True,
+                ),
+            ],
+            is_experimental=True,
+            is_output_node=True,
+        )

    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"filename_prefix": ("STRING", {"default": "loras/ComfyUI_extracted_lora"}),
-                             "rank": ("INT", {"default": 8, "min": 1, "max": 4096, "step": 1}),
-                             "lora_type": (tuple(LORA_TYPES.keys()),),
-                             "bias_diff": ("BOOLEAN", {"default": True}),
-                             },
-                "optional": {"model_diff": ("MODEL", {"tooltip": "The ModelSubtract output to be converted to a lora."}),
-                             "text_encoder_diff": ("CLIP", {"tooltip": "The CLIPSubtract output to be converted to a lora."})},
-                }
-    RETURN_TYPES = ()
-    FUNCTION = "save"
-    OUTPUT_NODE = True
-
-    CATEGORY = "_for_testing"
-
-    def save(self, filename_prefix, rank, lora_type, bias_diff, model_diff=None, text_encoder_diff=None):
+    def execute(cls, filename_prefix, rank, lora_type, bias_diff, model_diff=None, text_encoder_diff=None) -> io.NodeOutput:
        if model_diff is None and text_encoder_diff is None:
-            return {}
+            return io.NodeOutput()

        lora_type = LORA_TYPES.get(lora_type)
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())

        output_sd = {}
        if model_diff is not None:
@ -114,13 +126,16 @@ class LoraSave:
        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)

        comfy.utils.save_torch_file(output_sd, output_checkpoint, metadata=None)
-        return {}
+        return io.NodeOutput()


-NODE_CLASS_MAPPINGS = {
-    "LoraSave": LoraSave
-}
+class LoraSaveExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            LoraSave,
+        ]

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "LoraSave": "Extract and Save Lora"
-}
+
+async def comfy_entrypoint() -> LoraSaveExtension:
+    return LoraSaveExtension()
--- a/comfy_extras/nodes/nodes_lotus.py
+++ b/comfy_extras/nodes/nodes_lotus.py
@ -1,20 +1,22 @@
+from typing_extensions import override
+
 import torch
 import comfy.model_management as mm
+from comfy_api.latest import ComfyExtension, io

-class LotusConditioning:
+
+class LotusConditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-            },
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LotusConditioning",
+            category="conditioning/lotus",
+            inputs=[],
+            outputs=[io.Conditioning.Output(display_name="conditioning")],
+        )

-    RETURN_TYPES = ("CONDITIONING",)
-    RETURN_NAMES = ("conditioning",)
-    FUNCTION = "conditioning"
-    CATEGORY = "conditioning/lotus"
-
-    def conditioning(self):
+    @classmethod
+    def execute(cls) -> io.NodeOutput:
        device = mm.get_torch_device()
        #lotus uses a frozen encoder and null conditioning, i'm just inlining the results of that operation since it doesn't change
        #and getting parity with the reference implementation would otherwise require inference and 800mb of tensors
@ -22,8 +24,16 @@ class LotusConditioning:

        cond = [[prompt_embeds, {}]]

-        return (cond,)
+        return io.NodeOutput(cond)

-NODE_CLASS_MAPPINGS = {
-    "LotusConditioning" : LotusConditioning,
-}
+
+class LotusExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            LotusConditioning,
+        ]
+
+
+async def comfy_entrypoint() -> LotusExtension:
+    return LotusExtension()
--- a/comfy_extras/nodes/nodes_lt.py
+++ b/comfy_extras/nodes/nodes_lt.py
@ -1,4 +1,3 @@
-import io
 import math

 import av
@ -11,48 +10,58 @@ import comfy.utils
 from comfy import node_helpers
 from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
 from comfy.nodes import base_nodes as nodes
+from comfy_api.latest import ComfyExtension, io

-
-class EmptyLTXVLatentVideo:
+class EmptyLTXVLatentVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyLTXVLatentVideo",
+            category="latent/video/ltxv",
+            inputs=[
+                io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("length", default=97, min=1, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )

-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
-
-    CATEGORY = "latent/video/ltxv"
-
-    def generate(self, width, height, length, batch_size=1):
+    @classmethod
+    def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
-        return ({"samples": latent},)
+        return io.NodeOutput({"samples": latent})

+    generate = execute  # TODO: remove

-class LTXVImgToVideo:
+class LTXVImgToVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING",),
-                             "negative": ("CONDITIONING",),
-                             "vae": ("VAE",),
-                             "image": ("IMAGE",),
-                             "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                             }, "optional": {
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVImgToVideo",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Vae.Input("vae"),
+                io.Image.Input("image"),
+                io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("length", default=97, min=9, max=nodes.MAX_RESOLUTION, step=8),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Float.Input("strength", optional=True, default=1.0, min=0.0, max=1.0),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
-
-    CATEGORY = "conditioning/video_models"
-    FUNCTION = "generate"
-
-    def generate(self, positive, negative, image, vae, width, height, length, batch_size, strength=1.0):
+    @classmethod
+    def execute(cls, positive, negative, image, vae, width, height, length, batch_size, strength) -> io.NodeOutput:
        pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
        encode_pixels = pixels[:, :, :, :3]
        t = vae.encode(encode_pixels)
@ -67,7 +76,7 @@ class LTXVImgToVideo:
        )
        conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength

-        return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask},)
+        return io.NodeOutput(positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask})generate = execute  # TODO: remove


 def conditioning_get_any_value(conditioning, key, default=None):
@ -100,35 +109,45 @@ def get_keyframe_idxs(cond):
    return keyframe_idxs, num_keyframes


-class LTXVAddGuide:
+class LTXVAddGuide(io.ComfyNode):
+    NUM_PREFIX_FRAMES = 2
+    PATCHIFIER = SymmetricPatchifier(1)
+
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING",),
-                             "negative": ("CONDITIONING",),
-                             "vae": ("VAE",),
-                             "latent": ("LATENT",),
-                             "image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
-                                                            "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
-                             "frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
-                                                   "tooltip": "Frame index to start the conditioning at. For single-frame images or "
-                                                   "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
-                                                              "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
-                                                   "the nearest multiple of 8. Negative values are counted from the end of the video."}),
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                             }
-                }
-
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
-
-    CATEGORY = "conditioning/video_models"
-    FUNCTION = "generate"
-
-    def __init__(self):
-        self._num_prefix_frames = 2
-        self._patchifier = SymmetricPatchifier(1)
-
-    def encode(self, vae, latent_width, latent_height, images, scale_factors):
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVAddGuide",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Vae.Input("vae"),
+                io.Latent.Input("latent"),
+                io.Image.Input(
+                    "image",
+                    tooltip="Image or video to condition the latent video on. Must be 8*n + 1 frames. "
+                            "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.",
+                ),
+                io.Int.Input(
+                    "frame_idx",
+                    default=0,
+                    min=-9999,
+                    max=9999,
+                    tooltip="Frame index to start the conditioning at. "
+                            "For single-frame images or videos with 1-8 frames, any frame_idx value is acceptable. "
+                            "For videos with 9+ frames, frame_idx must be divisible by 8, otherwise it will be rounded "
+                            "down to the nearest multiple of 8. Negative values are counted from the end of the video.",
+                ),
+                io.Float.Input("strength", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )
+    @classmethod
+    def encode(cls, vae, latent_width, latent_height, images, scale_factors):
        time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
        images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
        pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
@ -136,7 +155,8 @@ class LTXVAddGuide:
        t = vae.encode(encode_pixels)
        return encode_pixels, t

-    def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
+    @classmethod
+    def get_latent_index(cls, cond, latent_length, guide_length, frame_idx, scale_factors):
        time_scale_factor, _, _ = scale_factors
        _, num_keyframes = get_keyframe_idxs(cond)
        latent_count = latent_length - num_keyframes
@ -148,9 +168,10 @@ class LTXVAddGuide:

        return frame_idx, latent_idx

-    def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
+    @classmethod
+    def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors):
        keyframe_idxs, _ = get_keyframe_idxs(cond)
-        _, latent_coords = self._patchifier.patchify(guiding_latent)
+        _, latent_coords = cls.PATCHIFIER.patchify(guiding_latent)
        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0)  # we need the causal fix only if we're placing the new latents at index 0
        pixel_coords[:, 0] += frame_idx
        if keyframe_idxs is None:
@ -159,8 +180,9 @@ class LTXVAddGuide:
            keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
        return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})

-    def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
-        _, latent_idx = self.get_latent_index(
+    @classmethod
+    def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
+        _, latent_idx = cls.get_latent_index(
            cond=positive,
            latent_length=latent_image.shape[2],
            guide_length=guiding_latent.shape[2],
@ -169,8 +191,8 @@ class LTXVAddGuide:
        )
        noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0

-        positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
-        negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
+        positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
+        negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)

        mask = torch.full(
            (noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
@ -183,7 +205,8 @@ class LTXVAddGuide:
        noise_mask = torch.cat([noise_mask, mask], dim=2)
        return positive, negative, latent_image, noise_mask

-    def replace_latent_frames(self, latent_image, noise_mask, guiding_latent, latent_idx, strength):
+    @classmethod
+    def replace_latent_frames(cls, latent_image, noise_mask, guiding_latent, latent_idx, strength):
        cond_length = guiding_latent.shape[2]
        assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."

@ -202,20 +225,21 @@ class LTXVAddGuide:

        return latent_image, noise_mask

-    def generate(self, positive, negative, vae, latent, image, frame_idx, strength):
+    @classmethod
+    def execute(cls, positive, negative, vae, latent, image, frame_idx, strength) -> io.NodeOutput:
        scale_factors = vae.downscale_index_formula
        latent_image = latent["samples"]
        noise_mask = get_noise_mask(latent)

        _, _, latent_length, latent_height, latent_width = latent_image.shape
-        image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
+        image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)

-        frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
+        frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
        assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."

-        num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
+        num_prefix_frames = min(cls.NUM_PREFIX_FRAMES, t.shape[2])

-        positive, negative, latent_image, noise_mask = self.append_keyframe(
+        positive, negative, latent_image, noise_mask = cls.append_keyframe(
            positive,
            negative,
            frame_idx,
@ -230,9 +254,9 @@ class LTXVAddGuide:

        t = t[:, :, num_prefix_frames:]
        if t.shape[2] == 0:
-            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+            return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})

-        latent_image, noise_mask = self.replace_latent_frames(
+        latent_image, noise_mask = cls.replace_latent_frames(
            latent_image,
            noise_mask,
            t,
@ -240,34 +264,37 @@ class LTXVAddGuide:
            strength,
        )

-        return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
+
+    generate = execute  # TODO: remove


-class LTXVCropGuides:
+class LTXVCropGuides(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING",),
-                             "negative": ("CONDITIONING",),
-                             "latent": ("LATENT",),
-                             }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVCropGuides",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                             io.Conditioning.Input("negative"),
+                             io.Latent.Input("latent"),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
-
-    CATEGORY = "conditioning/video_models"
-    FUNCTION = "crop"
-
-    def __init__(self):
-        self._patchifier = SymmetricPatchifier(1)
-
-    def crop(self, positive, negative, latent):
+    @classmethod
+    def execute(cls, positive, negative, latent) -> io.NodeOutput:
        latent_image = latent["samples"].clone()
        noise_mask = get_noise_mask(latent)

        _, num_keyframes = get_keyframe_idxs(positive)
        if num_keyframes == 0:
-            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+            return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)

        latent_image = latent_image[:, :, :-num_keyframes]
        noise_mask = noise_mask[:, :, :-num_keyframes]
@ -275,45 +302,54 @@ class LTXVCropGuides:
        positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
        negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})

-        return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
+
+    crop = execute  # TODO: remove


-class LTXVConditioning:
+class LTXVConditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING",),
-                             "negative": ("CONDITIONING",),
-                             "frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVConditioning",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Float.Input("frame_rate", default=25.0, min=0.0, max=1000.0, step=0.01),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+            ],
+        )

-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
-    RETURN_NAMES = ("positive", "negative")
-    FUNCTION = "append"
-
-    CATEGORY = "conditioning/video_models"
-
-    def append(self, positive, negative, frame_rate):
+    @classmethod
+    def execute(cls, positive, negative, frame_rate) -> io.NodeOutput:
        positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
        negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
-        return (positive, negative)
+        return io.NodeOutput(positive, negative)


-class ModelSamplingLTXV:
+class ModelSamplingLTXV(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL",),
-                             "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step": 0.01}),
-                             "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step": 0.01}),
-                             },
-                "optional": {"latent": ("LATENT",), }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ModelSamplingLTXV",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
+                io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
+                io.Latent.Input("latent", optional=True),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-
-    CATEGORY = "advanced/model"
-
-    def patch(self, model, max_shift, base_shift, latent=None):
+    @classmethod
+    def execute(cls, model, max_shift, base_shift, latent=None) -> io.NodeOutput:
        m = model.clone()

        if latent is None:
@ -337,37 +373,41 @@ class ModelSamplingLTXV:
        model_sampling.set_parameters(shift=shift)
        m.add_object_patch("model_sampling", model_sampling)

-        return (m,)
+        return io.NodeOutput(m)


-class LTXVScheduler:
+class LTXVScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step": 0.01}),
-                     "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step": 0.01}),
-                     "stretch": ("BOOLEAN", {
-                         "default": True,
-                         "tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
-                     }),
-                     "terminal": (
-                         "FLOAT",
-                         {
-                             "default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
-                             "tooltip": "The terminal value of the sigmas after stretching."
-                         },
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step= 0.01),
+                io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step= 0.01),
+                io.Boolean.Input(
+                    id="stretch",
+                         default= True,
+                         tooltip="Stretch the sigmas to be in the range [terminal, 1].",
                     ),
-                     },
-                "optional": {"latent": ("LATENT",), }
-                }
+                io.Float.Input(
+                    id="terminal",
+                         default=0.1,
+                    min=0.0,
+                    max=0.99,
+                    step= 0.01,
+                             tooltip="The terminal value of the sigmas after stretching.",
+                         ),
+                     io.Latent.Input("latent", optional=True),
+                     ],
+            outputs=[
+                io.Sigmas.Output(),
+            ],
+        )

-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
-
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
+    @classmethod
+    def execute(cls, steps, max_shift, base_shift, stretch, terminal, latent=None) -> io.NodeOutput:
        if latent is None:
            tokens = 4096
        else:
@ -397,7 +437,7 @@ class LTXVScheduler:
            stretched = 1.0 - (one_minus_z / scale_factor)
            sigmas[non_zero_mask] = stretched

-        return (sigmas,)
+        return io.NodeOutput(sigmas)


 def encode_single_frame(output_file, image_array: np.ndarray, crf):
@ -432,52 +472,55 @@ def preprocess(image: torch.Tensor, crf=29):
        return image

    image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
-    with io.BytesIO() as output_file:
+    with BytesIO() as output_file:
        encode_single_frame(output_file, image_array, crf)
        video_bytes = output_file.getvalue()
-    with io.BytesIO(video_bytes) as video_file:
+    with BytesIO(video_bytes) as video_file:
        image_array = decode_single_frame(video_file)
    tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
    return tensor


-class LTXVPreprocess:
+class LTXVPreprocess(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": ("IMAGE",),
-                "img_compression": (
-                    "INT",
-                    {
-                        "default": 35,
-                        "min": 0,
-                        "max": 100,
-                        "tooltip": "Amount of compression to apply on image.",
-                    },
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LTXVPreprocess",
+            category="image",
+            inputs=[
+                io.Image.Input("image"),
+                io.Int.Input(
+                    id="img_compression", default=35, min=0, max=100, tooltip="Amount of compression to apply on image."
                ),
-            }
-        }
+            ],
+            outputs=[
+                io.Image.Output(display_name="output_image"),
+            ],
+        )

-    FUNCTION = "preprocess"
-    RETURN_TYPES = ("IMAGE",)
-    RETURN_NAMES = ("output_image",)
-    CATEGORY = "image"
-
-    def preprocess(self, image, img_compression):
+    @classmethod
+    def execute(cls, image, img_compression) -> io.NodeOutput:
        output_images = []
        for i in range(image.shape[0]):
            output_images.append(preprocess(image[i], img_compression))
-        return (torch.stack(output_images),)
+        return io.NodeOutput(torch.stack(output_images))
+
+    preprocess = execute  # TODO: remove
+
+class LtxvExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            EmptyLTXVLatentVideo,
+            LTXVImgToVideo,
+            ModelSamplingLTXV,
+            LTXVConditioning,
+            LTXVScheduler,
+            LTXVAddGuide,
+            LTXVPreprocess,
+            LTXVCropGuides,
+        ]


-NODE_CLASS_MAPPINGS = {
-    "EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
-    "LTXVImgToVideo": LTXVImgToVideo,
-    "ModelSamplingLTXV": ModelSamplingLTXV,
-    "LTXVConditioning": LTXVConditioning,
-    "LTXVScheduler": LTXVScheduler,
-    "LTXVAddGuide": LTXVAddGuide,
-    "LTXVPreprocess": LTXVPreprocess,
-    "LTXVCropGuides": LTXVCropGuides,
-}
+async def comfy_entrypoint() -> LtxvExtension:
+    return LtxvExtension()
--- a/comfy_extras/nodes/nodes_lumina2.py
+++ b/comfy_extras/nodes/nodes_lumina2.py
@ -2,22 +2,28 @@ import torch

 from comfy.comfy_types import IO
 from comfy.nodes.package_typing import CustomNode
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io


-class RenormCFG(CustomNode):
+class RenormCFG(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL",),
-                             "cfg_trunc": ("FLOAT", {"default": 100, "min": 0.0, "max": 100.0, "step": 0.01}),
-                             "renorm_cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RenormCFG",
+            category="advanced/model",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01),
+                io.Float.Input("renorm_cfg", default=1.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-
-    CATEGORY = "advanced/model"
-
-    def patch(self, model, cfg_trunc, renorm_cfg):
+    @classmethod
+    def execute(cls, model, cfg_trunc, renorm_cfg) -> io.NodeOutput:
        def renorm_cfg_func(args):
            cond_denoised = args["cond_denoised"]
            uncond_denoised = args["uncond_denoised"]
@ -56,10 +62,10 @@ class RenormCFG(CustomNode):

        m = model.clone()
        m.set_model_sampler_cfg_function(renorm_cfg_func)
-        return (m,)
+        return io.NodeOutput(m)


-class CLIPTextEncodeLumina2(CustomNode):
+class CLIPTextEncodeLumina2(io.ComfyNode):
    SYSTEM_PROMPT = {
        "superior": "You are an assistant designed to generate superior images with the superior " \
                    "degree of image-text alignment based on textual prompts or user prompts.",
@ -73,36 +79,52 @@ class CLIPTextEncodeLumina2(CustomNode):
                        "degree of image-text alignment based on textual prompts."

    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "system_prompt": (list(CLIPTextEncodeLumina2.SYSTEM_PROMPT.keys()), {"tooltip": CLIPTextEncodeLumina2.SYSTEM_PROMPT_TIP}),
-                "user_prompt": (IO.STRING, {"multiline": True, "dynamicPrompts": True, "tooltip": "The text to be encoded."}),
-                "clip": (IO.CLIP, {"tooltip": "The CLIP model used for encoding the text."})
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeLumina2",
+            display_name="CLIP Text Encode for Lumina2",
+            category="conditioning",
+            description="Encodes a system prompt and a user prompt using a CLIP model into an embedding "
+                        "that can be used to guide the diffusion model towards generating specific images.",
+            inputs=[
+                io.Combo.Input(
+                    "system_prompt",
+                    options=list(cls.SYSTEM_PROMPT.keys()),
+                    tooltip=cls.SYSTEM_PROMPT_TIP,
+                ),
+                io.String.Input(
+                    "user_prompt",
+                    multiline=True,
+                    dynamic_prompts=True,
+                    tooltip="The text to be encoded.",
+                ),
+                io.Clip.Input("clip", tooltip="The CLIP model used for encoding the text."),
+            ],
+            outputs=[
+                io.Conditioning.Output(
+                    tooltip="A conditioning containing the embedded text used to guide the diffusion model.",
+                ),
+            ],
+        )

-    RETURN_TYPES = (IO.CONDITIONING,)
-    OUTPUT_TOOLTIPS = ("A conditioning containing the embedded text used to guide the diffusion model.",)
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning"
-    DESCRIPTION = "Encodes a system prompt and a user prompt using a CLIP model into an embedding that can be used to guide the diffusion model towards generating specific images."
-
-    def encode(self, clip, user_prompt, system_prompt):
+    @classmethod
+    def execute(cls, clip, user_prompt, system_prompt) -> io.NodeOutput:
        if clip is None:
            raise RuntimeError("ERROR: clip input is invalid: None\n\nIf the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model.")
-        system_prompt = CLIPTextEncodeLumina2.SYSTEM_PROMPT[system_prompt]
+        system_prompt = cls.SYSTEM_PROMPT[system_prompt]
        prompt = f'{system_prompt} <Prompt Start> {user_prompt}'
        tokens = clip.tokenize(prompt)
-        return (clip.encode_from_tokens_scheduled(tokens),)
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))


-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeLumina2": CLIPTextEncodeLumina2,
-    "RenormCFG": RenormCFG
-}
+class Lumina2Extension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeLumina2,
+            RenormCFG,
+        ]

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "CLIPTextEncodeLumina2": "CLIP Text Encode for Lumina2",
-}
+
+async def comfy_entrypoint() -> Lumina2Extension:
+    return Lumina2Extension()
--- a/comfy_extras/nodes/nodes_mahiro.py
+++ b/comfy_extras/nodes/nodes_mahiro.py
@ -1,20 +1,29 @@
+from typing_extensions import override
 import torch
 import torch.nn.functional as F

+from comfy_api.latest import ComfyExtension, io

-class Mahiro:
+
+class Mahiro(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL",),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Mahiro",
+            display_name="Mahiro is so cute that she deserves a better guidance function!! (。・ω・。)",
+            category="_for_testing",
+            description="Modify the guidance to scale more on the 'direction' of the positive prompt rather than the difference between the negative prompt.",
+            inputs=[
+                io.Model.Input("model"),
+            ],
+            outputs=[
+                io.Model.Output(display_name="patched_model"),
+            ],
+            is_experimental=True,
+        )

-    RETURN_TYPES = ("MODEL",)
-    RETURN_NAMES = ("patched_model",)
-    FUNCTION = "patch"
-    CATEGORY = "_for_testing"
-    DESCRIPTION = "Modify the guidance to scale more on the 'direction' of the positive prompt rather than the difference between the negative prompt."
-
-    def patch(self, model):
+    @classmethod
+    def execute(cls, model) -> io.NodeOutput:
        m = model.clone()

        def mahiro_normd(args):
@ -35,13 +44,16 @@ class Mahiro:
            return wm

        m.set_model_sampler_post_cfg_function(mahiro_normd)
-        return (m,)
+        return io.NodeOutput(m)


-NODE_CLASS_MAPPINGS = {
-    "Mahiro": Mahiro
-}
+class MahiroExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            Mahiro,
+        ]

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "Mahiro": "Mahiro is so cute that she deserves a better guidance function!! (。・ω・。)",
-}
+
+async def comfy_entrypoint() -> MahiroExtension:
+    return MahiroExtension()
--- a/comfy_extras/nodes/nodes_model_downscale.py
+++ b/comfy_extras/nodes/nodes_model_downscale.py
@ -1,24 +1,35 @@
 from comfy import utils
+from typing_extensions import override
+import comfy.utils
+from comfy_api.latest import ComfyExtension, io
+
+
+class PatchModelAddDownscale(io.ComfyNode):
+    UPSCALE_METHODS = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"]

-class PatchModelAddDownscale:
-    upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"]
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "model": ("MODEL",),
-                              "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}),
-                              "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}),
-                              "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
-                              "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}),
-                              "downscale_after_skip": ("BOOLEAN", {"default": True}),
-                              "downscale_method": (s.upscale_methods,),
-                              "upscale_method": (s.upscale_methods,),
-                              }}
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PatchModelAddDownscale",
+            display_name="PatchModelAddDownscale (Kohya Deep Shrink)",
+            category="model_patches/unet",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("block_number", default=3, min=1, max=32, step=1),
+                io.Float.Input("downscale_factor", default=2.0, min=0.1, max=9.0, step=0.001),
+                io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("end_percent", default=0.35, min=0.0, max=1.0, step=0.001),
+                io.Boolean.Input("downscale_after_skip", default=True),
+                io.Combo.Input("downscale_method", options=cls.UPSCALE_METHODS),
+                io.Combo.Input("upscale_method", options=cls.UPSCALE_METHODS),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    CATEGORY = "model_patches/unet"
-
-    def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method):
+    @classmethod
+    def execute(cls, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method) -> io.NodeOutput:
        model_sampling = model.get_model_object("model_sampling")
        sigma_start = model_sampling.percent_to_sigma(start_percent)
        sigma_end = model_sampling.percent_to_sigma(end_percent)
@ -41,13 +52,22 @@ class PatchModelAddDownscale:
        else:
            m.set_model_input_block_patch(input_block_patch)
        m.set_model_output_block_patch(output_block_patch)
-        return (m, )
+        return io.NodeOutput(m)

-NODE_CLASS_MAPPINGS = {
-    "PatchModelAddDownscale": PatchModelAddDownscale,
-}

 NODE_DISPLAY_NAME_MAPPINGS = {
    # Sampling
-    "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)",
+    "PatchModelAddDownscale": "",
 }
+
+
+class ModelDownscaleExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            PatchModelAddDownscale,
+        ]
+
+
+async def comfy_entrypoint() -> ModelDownscaleExtension:
+    return ModelDownscaleExtension()
--- a/comfy_extras/nodes/nodes_morphology.py
+++ b/comfy_extras/nodes/nodes_morphology.py
@ -1,24 +1,34 @@
 import torch
 import comfy.model_management
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io

 from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
 import kornia.color


-class Morphology:
+class Morphology(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"image": ("IMAGE",),
-                                "operation": (["erode",  "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
-                                "kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
-                                }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Morphology",
+            display_name="ImageMorphology",
+            category="image/postprocessing",
+            inputs=[
+                io.Image.Input("image"),
+                io.Combo.Input(
+                    "operation",
+                    options=["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],
+                ),
+                io.Int.Input("kernel_size", default=3, min=3, max=999, step=1),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )

-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "process"
-
-    CATEGORY = "image/postprocessing"
-
-    def process(self, image, operation, kernel_size):
+    @classmethod
+    def execute(cls, image, operation, kernel_size) -> io.NodeOutput:
        device = comfy.model_management.get_torch_device()
        kernel = torch.ones(kernel_size, kernel_size, device=device)
        image_k = image.to(device).movedim(-1, 1)
@ -39,49 +49,63 @@ class Morphology:
        else:
            raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
        img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
-        return (img_out,)
+        return io.NodeOutput(img_out)


-class ImageRGBToYUV:
+class ImageRGBToYUV(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "image": ("IMAGE",),
-                              }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageRGBToYUV",
+            category="image/batch",
+            inputs=[
+                io.Image.Input("image"),
+            ],
+            outputs=[
+                io.Image.Output(display_name="Y"),
+                io.Image.Output(display_name="U"),
+                io.Image.Output(display_name="V"),
+            ],
+        )

-    RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE")
-    RETURN_NAMES = ("Y", "U", "V")
-    FUNCTION = "execute"
-
-    CATEGORY = "image/batch"
-
-    def execute(self, image):
+    @classmethod
+    def execute(cls, image) -> io.NodeOutput:
        out = kornia.color.rgb_to_ycbcr(image.movedim(-1, 1)).movedim(1, -1)
-        return (out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))
+        return io.NodeOutput(out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))

-class ImageYUVToRGB:
+class ImageYUVToRGB(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"Y": ("IMAGE",),
-                             "U": ("IMAGE",),
-                             "V": ("IMAGE",),
-                              }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ImageYUVToRGB",
+            category="image/batch",
+            inputs=[
+                io.Image.Input("Y"),
+                io.Image.Input("U"),
+                io.Image.Input("V"),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )

-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "execute"
-
-    CATEGORY = "image/batch"
-
-    def execute(self, Y, U, V):
+    @classmethod
+    def execute(cls, Y, U, V) -> io.NodeOutput:
        image = torch.cat([torch.mean(Y, dim=-1, keepdim=True), torch.mean(U, dim=-1, keepdim=True), torch.mean(V, dim=-1, keepdim=True)], dim=-1)
        out = kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1)
-        return (out,)
+        return io.NodeOutput(out)

-NODE_CLASS_MAPPINGS = {
-    "Morphology": Morphology,
-    "ImageRGBToYUV": ImageRGBToYUV,
-    "ImageYUVToRGB": ImageYUVToRGB,
-}

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "Morphology": "ImageMorphology",
-}
+class MorphologyExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            Morphology,
+            ImageRGBToYUV,
+            ImageYUVToRGB,
+        ]
+
+
+async def comfy_entrypoint() -> MorphologyExtension:
+    return MorphologyExtension()
+
--- a/comfy_extras/nodes/nodes_optimalsteps.py
+++ b/comfy_extras/nodes/nodes_optimalsteps.py
@ -1,9 +1,12 @@
 # from https://github.com/bebebe666/OptimalSteps

-
 import numpy as np
 import torch

+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+
+
 def loglinear_interp(t_steps, num_steps):
    """
    Performs log-linear interpolation of a given array of decreasing numbers.
@ -23,25 +26,28 @@ NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0
 "Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
 }

-class OptimalStepsScheduler:
+class OptimalStepsScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model_type": (["FLUX", "Wan", "Chroma"], ),
-                     "steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                      }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="OptimalStepsScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Combo.Input("model_type", options=["FLUX", "Wan", "Chroma"]),
+                io.Int.Input("steps", default=20, min=3, max=1000),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(),
+            ],
+        )

-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, model_type, steps, denoise):
+    @classmethod
+    def execute(cls, model_type, steps, denoise) ->io.NodeOutput:
        total_steps = steps
        if denoise < 1.0:
            if denoise <= 0.0:
-                return (torch.FloatTensor([]),)
+                return io.NodeOutput(torch.FloatTensor([]))
            total_steps = round(steps * denoise)

        sigmas = NOISE_LEVELS[model_type][:]
@ -50,8 +56,16 @@ class OptimalStepsScheduler:

        sigmas = sigmas[-(total_steps + 1):]
        sigmas[-1] = 0
-        return (torch.FloatTensor(sigmas), )
+        return io.NodeOutput(torch.FloatTensor(sigmas))

-NODE_CLASS_MAPPINGS = {
-    "OptimalStepsScheduler": OptimalStepsScheduler,
-}
+
+class OptimalStepsExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            OptimalStepsScheduler,
+        ]
+
+
+async def comfy_entrypoint() -> OptimalStepsExtension:
+    return OptimalStepsExtension()
--- a/comfy_extras/nodes/nodes_pag.py
+++ b/comfy_extras/nodes/nodes_pag.py
@ -3,25 +3,30 @@

 #My modified one here is more basic but has less chances of breaking with ComfyUI updates.

+from typing_extensions import override
+
 import comfy.model_patcher
 import comfy.samplers
+from comfy_api.latest import ComfyExtension, io

-class PerturbedAttentionGuidance:
+
+class PerturbedAttentionGuidance(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": ("MODEL",),
-                "scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": 0.01}),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PerturbedAttentionGuidance",
+            category="model_patches/unet",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("scale", default=3.0, min=0.0, max=100.0, step=0.01, round=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )

-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-
-    CATEGORY = "model_patches/unet"
-
-    def patch(self, model, scale):
+    @classmethod
+    def execute(cls, model, scale) -> io.NodeOutput:
        unet_block = "middle"
        unet_block_id = 0
        m = model.clone()
@ -49,8 +54,16 @@ class PerturbedAttentionGuidance:

        m.set_model_sampler_post_cfg_function(post_cfg_function)

-        return (m,)
+        return io.NodeOutput(m)

-NODE_CLASS_MAPPINGS = {
-    "PerturbedAttentionGuidance": PerturbedAttentionGuidance,
-}
+
+class PAGExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            PerturbedAttentionGuidance,
+        ]
+
+
+async def comfy_entrypoint() -> PAGExtension:
+    return PAGExtension()
--- a/comfy_extras/nodes/nodes_perpneg.py
+++ b/comfy_extras/nodes/nodes_perpneg.py
@ -3,6 +3,9 @@ from comfy import samplers
 from comfy import sampler_helpers
 from comfy import node_helpers
 import math
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+

 def perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_nocond, neg_scale, cond_scale):
    pos = noise_pred_pos - noise_pred_nocond
@ -14,20 +17,27 @@ def perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_nocond, neg_scale, co
    return cfg_result

 #TODO: This node should be removed, it has been replaced with PerpNegGuider
-class PerpNeg:
+class PerpNeg(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL", ),
-                             "empty_conditioning": ("CONDITIONING", ),
-                             "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
-                            }}
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PerpNeg",
+            display_name="Perp-Neg (DEPRECATED by PerpNegGuider)",
+            category="_for_testing",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("empty_conditioning"),
+                io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+            is_experimental=True,
+            is_deprecated=True,
+        )

-    CATEGORY = "_for_testing"
-    DEPRECATED = True
-
-    def patch(self, model, empty_conditioning, neg_scale):
+    @classmethod
+    def execute(cls, model, empty_conditioning, neg_scale) -> io.NodeOutput:
        m = model.clone()
        nocond = sampler_helpers.convert_cond(empty_conditioning)

@ -48,7 +58,7 @@ class PerpNeg:

        m.set_model_sampler_cfg_function(cfg_function)

-        return (m, )
+        return io.NodeOutput(m)


 class Guider_PerpNeg(samplers.CFGGuider):
@ -111,35 +121,42 @@ class Guider_PerpNeg(samplers.CFGGuider):

        return cfg_result

-class PerpNegGuider:
+class PerpNegGuider(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                    "positive": ("CONDITIONING", ),
-                    "negative": ("CONDITIONING", ),
-                    "empty_conditioning": ("CONDITIONING", ),
-                    "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                    "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PerpNegGuider",
+            category="_for_testing",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Conditioning.Input("empty_conditioning"),
+                io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01),
+            ],
+            outputs=[
+                io.Guider.Output(),
+            ],
+            is_experimental=True,
+        )

-    RETURN_TYPES = ("GUIDER",)
-
-    FUNCTION = "get_guider"
-    CATEGORY = "_for_testing"
-
-    def get_guider(self, model, positive, negative, empty_conditioning, cfg, neg_scale):
+    @classmethod
+    def execute(cls, model, positive, negative, empty_conditioning, cfg, neg_scale) -> io.NodeOutput:
        guider = Guider_PerpNeg(model)
        guider.set_conds(positive, negative, empty_conditioning)
        guider.set_cfg(cfg, neg_scale)
-        return (guider,)
+        return io.NodeOutput(guider)

-NODE_CLASS_MAPPINGS = {
-    "PerpNeg": PerpNeg,
-    "PerpNegGuider": PerpNegGuider,
-}

-NODE_DISPLAY_NAME_MAPPINGS = {
-    "PerpNeg": "Perp-Neg (DEPRECATED by PerpNegGuider)",
-}
+class PerpNegExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            PerpNeg,
+            PerpNegGuider,
+        ]
+
+
+async def comfy_entrypoint() -> PerpNegExtension:
+    return PerpNegExtension()
--- a/Show More
+++ b/Show More