diff --git a/comfy/ldm/ace/vae/music_dcae_pipeline.py b/comfy/ldm/ace/vae/music_dcae_pipeline.py
index af81280eb..3c8830c17 100644
--- a/comfy/ldm/ace/vae/music_dcae_pipeline.py
+++ b/comfy/ldm/ace/vae/music_dcae_pipeline.py
@@ -23,8 +23,6 @@ class MusicDCAE(torch.nn.Module):
         else:
             self.source_sample_rate = source_sample_rate
 
-        # self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100)
-
         self.transform = transforms.Compose([
             transforms.Normalize(0.5, 0.5),
         ])
@@ -37,10 +35,6 @@ class MusicDCAE(torch.nn.Module):
         self.scale_factor = 0.1786
         self.shift_factor = -1.9091
 
-    def load_audio(self, audio_path):
-        audio, sr = torchaudio.load(audio_path)
-        return audio, sr
-
     def forward_mel(self, audios):
         mels = []
         for i in range(len(audios)):
@@ -73,10 +67,8 @@ class MusicDCAE(torch.nn.Module):
             latent = self.dcae.encoder(mel.unsqueeze(0))
             latents.append(latent)
         latents = torch.cat(latents, dim=0)
-        # latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long()
         latents = (latents - self.shift_factor) * self.scale_factor
         return latents
-        # return latents, latent_lengths
 
     @torch.no_grad()
     def decode(self, latents, audio_lengths=None, sr=None):
@@ -91,9 +83,7 @@ class MusicDCAE(torch.nn.Module):
             wav = self.vocoder.decode(mels[0]).squeeze(1)
 
             if sr is not None:
-                # resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype)
                 wav = torchaudio.functional.resample(wav, 44100, sr)
-                # wav = resampler(wav)
             else:
                 sr = 44100
             pred_wavs.append(wav)
@@ -101,7 +91,6 @@ class MusicDCAE(torch.nn.Module):
         if audio_lengths is not None:
             pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
         return torch.stack(pred_wavs)
-        # return sr, pred_wavs
 
     def forward(self, audios, audio_lengths=None, sr=None):
         latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 46415c17a..7677617c0 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -365,8 +365,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["patch_size"] = 2
         dit_config["in_channels"] = 16
         dit_config["dim"] = 2304
-        dit_config["cap_feat_dim"] = 2304
-        dit_config["n_layers"] = 26
+        dit_config["cap_feat_dim"] = state_dict['{}cap_embedder.1.weight'.format(key_prefix)].shape[1]
+        dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
         dit_config["n_heads"] = 24
         dit_config["n_kv_heads"] = 8
         dit_config["qk_norm"] = True
diff --git a/comfy/sd.py b/comfy/sd.py
index be225ad03..f2d95f85a 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -890,6 +890,7 @@ class TEModel(Enum):
     QWEN25_3B = 10
     QWEN25_7B = 11
     BYT5_SMALL_GLYPH = 12
+    GEMMA_3_4B = 13
 
 def detect_te_model(sd):
     if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
@@ -912,6 +913,8 @@ def detect_te_model(sd):
             return TEModel.BYT5_SMALL_GLYPH
         return TEModel.T5_BASE
     if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
+        if 'model.layers.0.self_attn.q_norm.weight' in sd:
+            return TEModel.GEMMA_3_4B
         return TEModel.GEMMA_2_2B
     if 'model.layers.0.self_attn.k_proj.bias' in sd:
         weight = sd['model.layers.0.self_attn.k_proj.bias']
@@ -1016,6 +1019,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
             clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
             clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
             tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+        elif te_model == TEModel.GEMMA_3_4B:
+            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b")
+            clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
+            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
         elif te_model == TEModel.LLAMA3_8:
             clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
                                                                         clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index c5a48ba9f..c050759fe 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -3,6 +3,7 @@ import torch.nn as nn
 from dataclasses import dataclass
 from typing import Optional, Any
 import math
+import logging
 
 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
@@ -28,6 +29,9 @@ class Llama2Config:
     mlp_activation = "silu"
     qkv_bias = False
     rope_dims = None
+    q_norm = None
+    k_norm = None
+    rope_scale = None
 
 @dataclass
 class Qwen25_3BConfig:
@@ -46,6 +50,9 @@ class Qwen25_3BConfig:
     mlp_activation = "silu"
     qkv_bias = True
     rope_dims = None
+    q_norm = None
+    k_norm = None
+    rope_scale = None
 
 @dataclass
 class Qwen25_7BVLI_Config:
@@ -64,6 +71,9 @@ class Qwen25_7BVLI_Config:
     mlp_activation = "silu"
     qkv_bias = True
     rope_dims = [16, 24, 24]
+    q_norm = None
+    k_norm = None
+    rope_scale = None
 
 @dataclass
 class Gemma2_2B_Config:
@@ -82,6 +92,32 @@ class Gemma2_2B_Config:
     mlp_activation = "gelu_pytorch_tanh"
     qkv_bias = False
     rope_dims = None
+    q_norm = None
+    k_norm = None
+    sliding_attention = None
+    rope_scale = None
+
+@dataclass
+class Gemma3_4B_Config:
+    vocab_size: int = 262208
+    hidden_size: int = 2560
+    intermediate_size: int = 10240
+    num_hidden_layers: int = 34
+    num_attention_heads: int = 8
+    num_key_value_heads: int = 4
+    max_position_embeddings: int = 131072
+    rms_norm_eps: float = 1e-6
+    rope_theta = [10000.0, 1000000.0]
+    transformer_type: str = "gemma3"
+    head_dim = 256
+    rms_norm_add = True
+    mlp_activation = "gelu_pytorch_tanh"
+    qkv_bias = False
+    rope_dims = None
+    q_norm = "gemma3"
+    k_norm = "gemma3"
+    sliding_attention = [False, False, False, False, False, 1024]
+    rope_scale = [1.0, 8.0]
 
 class RMSNorm(nn.Module):
     def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
@@ -106,25 +142,40 @@ def rotate_half(x):
     return torch.cat((-x2, x1), dim=-1)
 
 
-def precompute_freqs_cis(head_dim, position_ids, theta, rope_dims=None, device=None):
-    theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
-    inv_freq = 1.0 / (theta ** (theta_numerator / head_dim))
+def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None):
+    if not isinstance(theta, list):
+        theta = [theta]
 
-    inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
-    position_ids_expanded = position_ids[:, None, :].float()
-    freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
-    emb = torch.cat((freqs, freqs), dim=-1)
-    cos = emb.cos()
-    sin = emb.sin()
-    if rope_dims is not None and position_ids.shape[0] > 1:
-        mrope_section = rope_dims * 2
-        cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
-        sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
-    else:
-        cos = cos.unsqueeze(1)
-        sin = sin.unsqueeze(1)
+    out = []
+    for index, t in enumerate(theta):
+        theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
+        inv_freq = 1.0 / (t ** (theta_numerator / head_dim))
 
-    return (cos, sin)
+        if rope_scale is not None:
+            if isinstance(rope_scale, list):
+                inv_freq /= rope_scale[index]
+            else:
+                inv_freq /= rope_scale
+
+        inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
+        position_ids_expanded = position_ids[:, None, :].float()
+        freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        cos = emb.cos()
+        sin = emb.sin()
+        if rope_dims is not None and position_ids.shape[0] > 1:
+            mrope_section = rope_dims * 2
+            cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
+            sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
+        else:
+            cos = cos.unsqueeze(1)
+            sin = sin.unsqueeze(1)
+        out.append((cos, sin))
+
+    if len(out) == 1:
+        return out[0]
+
+    return out
 
 
 def apply_rope(xq, xk, freqs_cis):
@@ -152,6 +203,14 @@ class Attention(nn.Module):
         self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
         self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)
 
+        self.q_norm = None
+        self.k_norm = None
+
+        if config.q_norm == "gemma3":
+            self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+        if config.k_norm == "gemma3":
+            self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -168,6 +227,11 @@ class Attention(nn.Module):
         xk = xk.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
         xv = xv.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
 
+        if self.q_norm is not None:
+            xq = self.q_norm(xq)
+        if self.k_norm is not None:
+            xk = self.k_norm(xk)
+
         xq, xk = apply_rope(xq, xk, freqs_cis=freqs_cis)
 
         xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
@@ -192,7 +256,7 @@ class MLP(nn.Module):
         return self.down_proj(self.activation(self.gate_proj(x)) * self.up_proj(x))
 
 class TransformerBlock(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
         super().__init__()
         self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
         self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
@@ -226,7 +290,7 @@ class TransformerBlock(nn.Module):
         return x
 
 class TransformerBlockGemma2(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, index, device=None, dtype=None, ops: Any = None):
         super().__init__()
         self.self_attn = Attention(config, device=device, dtype=dtype, ops=ops)
         self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
@@ -235,6 +299,13 @@ class TransformerBlockGemma2(nn.Module):
         self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
         self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
 
+        if config.sliding_attention is not None:  # TODO: implement. (Not that necessary since models are trained on less than 1024 tokens)
+            self.sliding_attention = config.sliding_attention[index % len(config.sliding_attention)]
+        else:
+            self.sliding_attention = False
+
+        self.transformer_type = config.transformer_type
+
     def forward(
         self,
         x: torch.Tensor,
@@ -242,6 +313,14 @@ class TransformerBlockGemma2(nn.Module):
         freqs_cis: Optional[torch.Tensor] = None,
         optimized_attention=None,
     ):
+        if self.transformer_type == 'gemma3':
+            if self.sliding_attention:
+                if x.shape[1] > self.sliding_attention:
+                    logging.warning("Warning: sliding attention not implemented, results may be incorrect")
+                freqs_cis = freqs_cis[1]
+            else:
+                freqs_cis = freqs_cis[0]
+
         # Self Attention
         residual = x
         x = self.input_layernorm(x)
@@ -276,7 +355,7 @@ class Llama2_(nn.Module):
             device=device,
             dtype=dtype
         )
-        if self.config.transformer_type == "gemma2":
+        if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3":
             transformer = TransformerBlockGemma2
             self.normalize_in = True
         else:
@@ -284,8 +363,8 @@ class Llama2_(nn.Module):
             self.normalize_in = False
 
         self.layers = nn.ModuleList([
-            transformer(config, device=device, dtype=dtype, ops=ops)
-            for _ in range(config.num_hidden_layers)
+            transformer(config, index=i, device=device, dtype=dtype, ops=ops)
+            for i in range(config.num_hidden_layers)
         ])
         self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
         # self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
@@ -305,6 +384,7 @@ class Llama2_(nn.Module):
         freqs_cis = precompute_freqs_cis(self.config.head_dim,
                                          position_ids,
                                          self.config.rope_theta,
+                                         self.config.rope_scale,
                                          self.config.rope_dims,
                                          device=x.device)
 
@@ -433,3 +513,12 @@ class Gemma2_2B(BaseLlama, torch.nn.Module):
 
         self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
         self.dtype = dtype
+
+class Gemma3_4B(BaseLlama, torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        config = Gemma3_4B_Config(**config_dict)
+        self.num_layers = config.num_hidden_layers
+
+        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
diff --git a/comfy/text_encoders/lumina2.py b/comfy/text_encoders/lumina2.py
index 674461b75..fd986e2c1 100644
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@@ -11,23 +11,41 @@ class Gemma2BTokenizer(sd1_clip.SDTokenizer):
     def state_dict(self):
         return {"spiece_model": self.tokenizer.serialize_model()}
 
+class Gemma3_4BTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer = tokenizer_data.get("spiece_model", None)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+
+    def state_dict(self):
+        return {"spiece_model": self.tokenizer.serialize_model()}
 
 class LuminaTokenizer(sd1_clip.SD1Tokenizer):
     def __init__(self, embedding_directory=None, tokenizer_data={}):
         super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma2_2b", tokenizer=Gemma2BTokenizer)
 
+class NTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_4b", tokenizer=Gemma3_4BTokenizer)
 
 class Gemma2_2BModel(sd1_clip.SDClipModel):
     def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
         super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma2_2B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
 
+class Gemma3_4BModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
 
 class LuminaModel(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(device=device, dtype=dtype, name="gemma2_2b", clip_model=Gemma2_2BModel, model_options=model_options)
+    def __init__(self, device="cpu", dtype=None, model_options={}, name="gemma2_2b", clip_model=Gemma2_2BModel):
+        super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options)
 
 
-def te(dtype_llama=None, llama_scaled_fp8=None):
+def te(dtype_llama=None, llama_scaled_fp8=None, model_type="gemma2_2b"):
+    if model_type == "gemma2_2b":
+        model = Gemma2_2BModel
+    elif model_type == "gemma3_4b":
+        model = Gemma3_4BModel
+
     class LuminaTEModel_(LuminaModel):
         def __init__(self, device="cpu", dtype=None, model_options={}):
             if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
@@ -35,5 +53,5 @@ def te(dtype_llama=None, llama_scaled_fp8=None):
                 model_options["scaled_fp8"] = llama_scaled_fp8
             if dtype_llama is not None:
                 dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
+            super().__init__(device=device, dtype=dtype, name=model_type, model_options=model_options, clip_model=model)
     return LuminaTEModel_
diff --git a/comfy_api_nodes/apinode_utils.py b/comfy_api_nodes/apinode_utils.py
index 37438f835..5ac3b92aa 100644
--- a/comfy_api_nodes/apinode_utils.py
+++ b/comfy_api_nodes/apinode_utils.py
@@ -152,7 +152,7 @@ def validate_aspect_ratio(
             raise TypeError(
                 f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
             )
-        elif calculated_ratio > maximum_ratio:
+        if calculated_ratio > maximum_ratio:
             raise TypeError(
                 f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
             )
diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index d8646f106..457b43451 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -10,6 +10,8 @@ from collections.abc import Callable
 import math
 import logging
 
+from typing_extensions import override
+
 import torch
 
 from comfy_api_nodes.apis import (
@@ -63,8 +65,8 @@ from comfy_api_nodes.apinode_utils import (
     upload_video_to_comfyapi,
     upload_audio_to_comfyapi,
     download_url_to_image_tensor,
+    validate_string,
 )
-from comfy_api_nodes.mapper_utils import model_field_to_node_input
 from comfy_api_nodes.util.validation_utils import (
     validate_image_dimensions,
     validate_image_aspect_ratio,
@@ -73,8 +75,7 @@ from comfy_api_nodes.util.validation_utils import (
 )
 from comfy_api.input.basic_types import AudioInput
 from comfy_api.input.video_types import VideoInput
-from comfy_api.input_impl import VideoFromFile
-from comfy.comfy_types.node_typing import IO, InputTypeOptions, ComfyNodeABC
+from comfy_api.latest import ComfyExtension, io as comfy_io
 
 KLING_API_VERSION = "v1"
 PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video"
@@ -103,10 +104,113 @@ AVERAGE_DURATION_VIDEO_EXTEND = 320
 R = TypeVar("R")
 
 
-class KlingApiError(Exception):
-    """Base exception for Kling API errors."""
+MODE_TEXT2VIDEO = {
+    "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
+    "standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"),
+    "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
+    "pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"),
+    "standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"),
+    "standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"),
+    "pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"),
+    "pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"),
+    "standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"),
+    "standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"),
+    "pro mode / 5s duration / kling-v2-1-master": ("pro", "5", "kling-v2-1-master"),
+    "pro mode / 10s duration / kling-v2-1-master": ("pro", "10", "kling-v2-1-master"),
+    "pro mode / 5s duration / kling-v2-5-turbo": ("pro", "5", "kling-v2-5-turbo"),
+    "pro mode / 10s duration / kling-v2-5-turbo": ("pro", "10", "kling-v2-5-turbo"),
+}
+"""
+Mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
+Only includes config combos that support the `image_tail` request field.
 
-    pass
+See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
+"""
+
+
+MODE_START_END_FRAME = {
+    "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
+    "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
+    "pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"),
+    "pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"),
+    "pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"),
+    "pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"),
+    "pro mode / 5s duration / kling-v2-1": ("pro", "5", "kling-v2-1"),
+    "pro mode / 10s duration / kling-v2-1": ("pro", "10", "kling-v2-1"),
+}
+"""
+Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
+Only includes config combos that support the `image_tail` request field.
+
+See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
+"""
+
+
+VOICES_CONFIG = {
+    # English voices
+    "Melody": ("girlfriend_4_speech02", "en"),
+    "Sunny": ("genshin_vindi2", "en"),
+    "Sage": ("zhinen_xuesheng", "en"),
+    "Ace": ("AOT", "en"),
+    "Blossom": ("ai_shatang", "en"),
+    "Peppy": ("genshin_klee2", "en"),
+    "Dove": ("genshin_kirara", "en"),
+    "Shine": ("ai_kaiya", "en"),
+    "Anchor": ("oversea_male1", "en"),
+    "Lyric": ("ai_chenjiahao_712", "en"),
+    "Tender": ("chat1_female_new-3", "en"),
+    "Siren": ("chat_0407_5-1", "en"),
+    "Zippy": ("cartoon-boy-07", "en"),
+    "Bud": ("uk_boy1", "en"),
+    "Sprite": ("cartoon-girl-01", "en"),
+    "Candy": ("PeppaPig_platform", "en"),
+    "Beacon": ("ai_huangzhong_712", "en"),
+    "Rock": ("ai_huangyaoshi_712", "en"),
+    "Titan": ("ai_laoguowang_712", "en"),
+    "Grace": ("chengshu_jiejie", "en"),
+    "Helen": ("you_pingjing", "en"),
+    "Lore": ("calm_story1", "en"),
+    "Crag": ("uk_man2", "en"),
+    "Prattle": ("laopopo_speech02", "en"),
+    "Hearth": ("heainainai_speech02", "en"),
+    "The Reader": ("reader_en_m-v1", "en"),
+    "Commercial Lady": ("commercial_lady_en_f-v1", "en"),
+    # Chinese voices
+    "阳光少年": ("genshin_vindi2", "zh"),
+    "懂事小弟": ("zhinen_xuesheng", "zh"),
+    "运动少年": ("tiyuxi_xuedi", "zh"),
+    "青春少女": ("ai_shatang", "zh"),
+    "温柔小妹": ("genshin_klee2", "zh"),
+    "元气少女": ("genshin_kirara", "zh"),
+    "阳光男生": ("ai_kaiya", "zh"),
+    "幽默小哥": ("tiexin_nanyou", "zh"),
+    "文艺小哥": ("ai_chenjiahao_712", "zh"),
+    "甜美邻家": ("girlfriend_1_speech02", "zh"),
+    "温柔姐姐": ("chat1_female_new-3", "zh"),
+    "职场女青": ("girlfriend_2_speech02", "zh"),
+    "活泼男童": ("cartoon-boy-07", "zh"),
+    "俏皮女童": ("cartoon-girl-01", "zh"),
+    "稳重老爸": ("ai_huangyaoshi_712", "zh"),
+    "温柔妈妈": ("you_pingjing", "zh"),
+    "严肃上司": ("ai_laoguowang_712", "zh"),
+    "优雅贵妇": ("chengshu_jiejie", "zh"),
+    "慈祥爷爷": ("zhuxi_speech02", "zh"),
+    "唠叨爷爷": ("uk_oldman3", "zh"),
+    "唠叨奶奶": ("laopopo_speech02", "zh"),
+    "和蔼奶奶": ("heainainai_speech02", "zh"),
+    "东北老铁": ("dongbeilaotie_speech02", "zh"),
+    "重庆小伙": ("chongqingxiaohuo_speech02", "zh"),
+    "四川妹子": ("chuanmeizi_speech02", "zh"),
+    "潮汕大叔": ("chaoshandashu_speech02", "zh"),
+    "台湾男生": ("ai_taiwan_man2_speech02", "zh"),
+    "西安掌柜": ("xianzhanggui_speech02", "zh"),
+    "天津姐姐": ("tianjinjiejie_speech02", "zh"),
+    "新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"),
+    "译制片男": ("yizhipiannan-v1", "zh"),
+    "撒娇女友": ("tianmeixuemei-v1", "zh"),
+    "刀片烟嗓": ("daopianyansang-v1", "zh"),
+    "乖巧正太": ("mengwa-v1", "zh"),
+}
 
 
 async def poll_until_finished(
@@ -142,11 +246,6 @@ def is_valid_camera_control_configs(configs: list[float]) -> bool:
     return any(not math.isclose(value, 0.0) for value in configs)
 
 
-def is_valid_prompt(prompt: str) -> bool:
-    """Verifies that the prompt is not empty."""
-    return bool(prompt)
-
-
 def is_valid_task_creation_response(response: KlingText2VideoResponse) -> bool:
     """Verifies that the initial response contains a task ID."""
     return bool(response.data.task_id)
@@ -190,7 +289,7 @@ def validate_task_creation_response(response) -> None:
     if not is_valid_task_creation_response(response):
         error_msg = f"Kling initial request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
         logging.error(error_msg)
-        raise KlingApiError(error_msg)
+        raise Exception(error_msg)
 
 
 def validate_video_result_response(response) -> None:
@@ -198,7 +297,7 @@ def validate_video_result_response(response) -> None:
     if not is_valid_video_response(response):
         error_msg = f"Kling task {response.data.task_id} succeeded but no video data found in response."
         logging.error(f"Error: {error_msg}.\nResponse: {response}")
-        raise KlingApiError(error_msg)
+        raise Exception(error_msg)
 
 
 def validate_image_result_response(response) -> None:
@@ -206,7 +305,7 @@ def validate_image_result_response(response) -> None:
     if not is_valid_image_response(response):
         error_msg = f"Kling task {response.data.task_id} succeeded but no image data found in response."
         logging.error(f"Error: {error_msg}.\nResponse: {response}")
-        raise KlingApiError(error_msg)
+        raise Exception(error_msg)
 
 
 def validate_input_image(image: torch.Tensor) -> None:
@@ -221,21 +320,6 @@ def validate_input_image(image: torch.Tensor) -> None:
     validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5)
 
 
-def get_camera_control_input_config(
-    tooltip: str, default: float = 0.0
-) -> tuple[IO, InputTypeOptions]:
-    """Returns common InputTypeOptions for Kling camera control configurations."""
-    input_config = {
-        "default": default,
-        "min": -10.0,
-        "max": 10.0,
-        "step": 0.25,
-        "display": "slider",
-        "tooltip": tooltip,
-    }
-    return IO.FLOAT, input_config
-
-
 def get_video_from_response(response) -> KlingVideoResult:
     """Returns the first video object from the Kling video generation task result.
     Will raise an error if the response is not valid.
@@ -278,17 +362,6 @@ def get_images_urls_from_response(response) -> Optional[str]:
         return None
 
 
-async def video_result_to_node_output(
-    video: KlingVideoResult,
-) -> tuple[VideoFromFile, str, str]:
-    """Converts a KlingVideoResult to a tuple of (VideoFromFile, str, str) to be used as a ComfyUI node output."""
-    return (
-        await download_url_to_video_output(str(video.url)),
-        str(video.id),
-        str(video.duration),
-    )
-
-
 async def image_result_to_node_output(
     images: list[KlingImageResult],
 ) -> torch.Tensor:
@@ -302,57 +375,339 @@ async def image_result_to_node_output(
         return torch.cat([await download_url_to_image_tensor(str(image.url)) for image in images])
 
 
-class KlingNodeBase(ComfyNodeABC):
-    """Base class for Kling nodes."""
+async def execute_text2video(
+    auth_kwargs: dict[str, str],
+    node_id: str,
+    prompt: str,
+    negative_prompt: str,
+    cfg_scale: float,
+    model_name: str,
+    model_mode: str,
+    duration: str,
+    aspect_ratio: str,
+    camera_control: Optional[KlingCameraControl] = None,
+) -> comfy_io.NodeOutput:
+    validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
+    initial_operation = SynchronousOperation(
+        endpoint=ApiEndpoint(
+            path=PATH_TEXT_TO_VIDEO,
+            method=HttpMethod.POST,
+            request_model=KlingText2VideoRequest,
+            response_model=KlingText2VideoResponse,
+        ),
+        request=KlingText2VideoRequest(
+            prompt=prompt if prompt else None,
+            negative_prompt=negative_prompt if negative_prompt else None,
+            duration=KlingVideoGenDuration(duration),
+            mode=KlingVideoGenMode(model_mode),
+            model_name=KlingVideoGenModelName(model_name),
+            cfg_scale=cfg_scale,
+            aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
+            camera_control=camera_control,
+        ),
+        auth_kwargs=auth_kwargs,
+    )
 
-    FUNCTION = "api_call"
-    CATEGORY = "api node/video/Kling"
-    API_NODE = True
+    task_creation_response = await initial_operation.execute()
+    validate_task_creation_response(task_creation_response)
+
+    task_id = task_creation_response.data.task_id
+    final_response = await poll_until_finished(
+        auth_kwargs,
+        ApiEndpoint(
+            path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
+            method=HttpMethod.GET,
+            request_model=EmptyRequest,
+            response_model=KlingText2VideoResponse,
+        ),
+        result_url_extractor=get_video_url_from_response,
+        estimated_duration=AVERAGE_DURATION_T2V,
+        node_id=node_id,
+    )
+    validate_video_result_response(final_response)
+
+    video = get_video_from_response(final_response)
+    return comfy_io.NodeOutput(await download_url_to_video_output(str(video.url)), str(video.id), str(video.duration))
 
 
-class KlingCameraControls(KlingNodeBase):
+async def execute_image2video(
+    auth_kwargs: dict[str, str],
+    node_id: str,
+    start_frame: torch.Tensor,
+    prompt: str,
+    negative_prompt: str,
+    model_name: str,
+    cfg_scale: float,
+    model_mode: str,
+    aspect_ratio: str,
+    duration: str,
+    camera_control: Optional[KlingCameraControl] = None,
+    end_frame: Optional[torch.Tensor] = None,
+) -> comfy_io.NodeOutput:
+    validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V)
+    validate_input_image(start_frame)
+
+    if camera_control is not None:
+        # Camera control type for image 2 video is always `simple`
+        camera_control.type = KlingCameraControlType.simple
+
+    if model_mode == "std" and model_name == KlingVideoGenModelName.kling_v2_5_turbo.value:
+        model_mode = "pro"  # October 5: currently "std" mode is not supported for this model
+
+    initial_operation = SynchronousOperation(
+        endpoint=ApiEndpoint(
+            path=PATH_IMAGE_TO_VIDEO,
+            method=HttpMethod.POST,
+            request_model=KlingImage2VideoRequest,
+            response_model=KlingImage2VideoResponse,
+        ),
+        request=KlingImage2VideoRequest(
+            model_name=KlingVideoGenModelName(model_name),
+            image=tensor_to_base64_string(start_frame),
+            image_tail=(
+                tensor_to_base64_string(end_frame)
+                if end_frame is not None
+                else None
+            ),
+            prompt=prompt,
+            negative_prompt=negative_prompt if negative_prompt else None,
+            cfg_scale=cfg_scale,
+            mode=KlingVideoGenMode(model_mode),
+            duration=KlingVideoGenDuration(duration),
+            camera_control=camera_control,
+        ),
+        auth_kwargs=auth_kwargs,
+    )
+
+    task_creation_response = await initial_operation.execute()
+    validate_task_creation_response(task_creation_response)
+    task_id = task_creation_response.data.task_id
+
+    final_response = await poll_until_finished(
+            auth_kwargs,
+            ApiEndpoint(
+                path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
+                method=HttpMethod.GET,
+                request_model=KlingImage2VideoRequest,
+                response_model=KlingImage2VideoResponse,
+            ),
+            result_url_extractor=get_video_url_from_response,
+            estimated_duration=AVERAGE_DURATION_I2V,
+            node_id=node_id,
+        )
+    validate_video_result_response(final_response)
+
+    video = get_video_from_response(final_response)
+    return comfy_io.NodeOutput(await download_url_to_video_output(str(video.url)), str(video.id), str(video.duration))
+
+
+async def execute_video_effect(
+    auth_kwargs: dict[str, str],
+    node_id: str,
+    dual_character: bool,
+    effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene,
+    model_name: str,
+    duration: KlingVideoGenDuration,
+    image_1: torch.Tensor,
+    image_2: Optional[torch.Tensor] = None,
+    model_mode: Optional[KlingVideoGenMode] = None,
+) -> comfy_io.NodeOutput:
+    if dual_character:
+        request_input_field = KlingDualCharacterEffectInput(
+            model_name=model_name,
+            mode=model_mode,
+            images=[
+                tensor_to_base64_string(image_1),
+                tensor_to_base64_string(image_2),
+            ],
+            duration=duration,
+        )
+    else:
+        request_input_field = KlingSingleImageEffectInput(
+            model_name=model_name,
+            image=tensor_to_base64_string(image_1),
+            duration=duration,
+        )
+
+    initial_operation = SynchronousOperation(
+        endpoint=ApiEndpoint(
+            path=PATH_VIDEO_EFFECTS,
+            method=HttpMethod.POST,
+            request_model=KlingVideoEffectsRequest,
+            response_model=KlingVideoEffectsResponse,
+        ),
+        request=KlingVideoEffectsRequest(
+            effect_scene=effect_scene,
+            input=request_input_field,
+        ),
+        auth_kwargs=auth_kwargs,
+    )
+
+    task_creation_response = await initial_operation.execute()
+    validate_task_creation_response(task_creation_response)
+    task_id = task_creation_response.data.task_id
+
+    final_response = await poll_until_finished(
+        auth_kwargs,
+        ApiEndpoint(
+            path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
+            method=HttpMethod.GET,
+            request_model=EmptyRequest,
+            response_model=KlingVideoEffectsResponse,
+        ),
+        result_url_extractor=get_video_url_from_response,
+        estimated_duration=AVERAGE_DURATION_VIDEO_EFFECTS,
+        node_id=node_id,
+    )
+    validate_video_result_response(final_response)
+
+    video = get_video_from_response(final_response)
+    return comfy_io.NodeOutput(await download_url_to_video_output(str(video.url)), str(video.id), str(video.duration))
+
+
+async def execute_lipsync(
+    auth_kwargs: dict[str, str],
+    node_id: str,
+    video: VideoInput,
+    audio: Optional[AudioInput] = None,
+    voice_language: Optional[str] = None,
+    model_mode: Optional[str] = None,
+    text: Optional[str] = None,
+    voice_speed: Optional[float] = None,
+    voice_id: Optional[str] = None,
+) -> comfy_io.NodeOutput:
+    if text:
+        validate_string(text, field_name="Text", max_length=MAX_PROMPT_LENGTH_LIP_SYNC)
+    validate_video_dimensions(video, 720, 1920)
+    validate_video_duration(video, 2, 10)
+
+    # Upload video to Comfy API and get download URL
+    video_url = await upload_video_to_comfyapi(video, auth_kwargs=auth_kwargs)
+    logging.info("Uploaded video to Comfy API. URL: %s", video_url)
+
+    # Upload the audio file to Comfy API and get download URL
+    if audio:
+        audio_url = await upload_audio_to_comfyapi(audio, auth_kwargs=auth_kwargs)
+        logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
+    else:
+        audio_url = None
+
+    initial_operation = SynchronousOperation(
+        endpoint=ApiEndpoint(
+            path=PATH_LIP_SYNC,
+            method=HttpMethod.POST,
+            request_model=KlingLipSyncRequest,
+            response_model=KlingLipSyncResponse,
+        ),
+        request=KlingLipSyncRequest(
+            input=KlingLipSyncInputObject(
+                video_url=video_url,
+                mode=model_mode,
+                text=text,
+                voice_language=voice_language,
+                voice_speed=voice_speed,
+                audio_type="url",
+                audio_url=audio_url,
+                voice_id=voice_id,
+            ),
+        ),
+        auth_kwargs=auth_kwargs,
+    )
+
+    task_creation_response = await initial_operation.execute()
+    validate_task_creation_response(task_creation_response)
+    task_id = task_creation_response.data.task_id
+
+    final_response = await poll_until_finished(
+        auth_kwargs,
+        ApiEndpoint(
+            path=f"{PATH_LIP_SYNC}/{task_id}",
+            method=HttpMethod.GET,
+            request_model=EmptyRequest,
+            response_model=KlingLipSyncResponse,
+        ),
+        result_url_extractor=get_video_url_from_response,
+        estimated_duration=AVERAGE_DURATION_LIP_SYNC,
+        node_id=node_id,
+    )
+    validate_video_result_response(final_response)
+
+    video = get_video_from_response(final_response)
+    return comfy_io.NodeOutput(await download_url_to_video_output(str(video.url)), str(video.id), str(video.duration))
+
+
+class KlingCameraControls(comfy_io.ComfyNode):
     """Kling Camera Controls Node"""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "camera_control_type": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingCameraControl,
-                    "type",
-                    enum_type=KlingCameraControlType,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingCameraControls",
+            display_name="Kling Camera Controls",
+            category="api node/video/Kling",
+            description="Allows specifying configuration options for Kling Camera Controls and motion control effects.",
+            inputs=[
+                comfy_io.Combo.Input("camera_control_type", options=[i.value for i in KlingCameraControlType]),
+                comfy_io.Float.Input(
+                    "horizontal_movement",
+                    default=0.0,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right",
                 ),
-                "horizontal_movement": get_camera_control_input_config(
-                    "Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right"
+                comfy_io.Float.Input(
+                    "vertical_movement",
+                    default=0.0,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward.",
                 ),
-                "vertical_movement": get_camera_control_input_config(
-                    "Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward."
-                ),
-                "pan": get_camera_control_input_config(
-                    "Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.",
+                comfy_io.Float.Input(
+                    "pan",
                     default=0.5,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.",
                 ),
-                "tilt": get_camera_control_input_config(
-                    "Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.",
+                comfy_io.Float.Input(
+                    "tilt",
+                    default=0.0,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.",
                 ),
-                "roll": get_camera_control_input_config(
-                    "Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.",
+                comfy_io.Float.Input(
+                    "roll",
+                    default=0.0,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.",
                 ),
-                "zoom": get_camera_control_input_config(
-                    "Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.",
+                comfy_io.Float.Input(
+                    "zoom",
+                    default=0.0,
+                    min=-10.0,
+                    max=10.0,
+                    step=0.25,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.",
                 ),
-            }
-        }
-
-    DESCRIPTION = "Allows specifying configuration options for Kling Camera Controls and motion control effects."
-    RETURN_TYPES = ("CAMERA_CONTROL",)
-    RETURN_NAMES = ("camera_control",)
-    FUNCTION = "main"
-    API_NODE = False  # This is just a helper node, it doesn't make an API call
+            ],
+            outputs=[comfy_io.Custom("CAMERA_CONTROL").Output(display_name="camera_control")],
+        )
 
     @classmethod
-    def VALIDATE_INPUTS(
+    def validate_inputs(
         cls,
         horizontal_movement: float,
         vertical_movement: float,
@@ -374,8 +729,9 @@ class KlingCameraControls(KlingNodeBase):
             return "Invalid camera control configs: at least one of the values must be non-zero"
         return True
 
-    def main(
-        self,
+    @classmethod
+    def execute(
+        cls,
         camera_control_type: str,
         horizontal_movement: float,
         vertical_movement: float,
@@ -383,8 +739,8 @@ class KlingCameraControls(KlingNodeBase):
         tilt: float,
         roll: float,
         zoom: float,
-    ) -> tuple[KlingCameraControl]:
-        return (
+    ) -> comfy_io.NodeOutput:
+        return comfy_io.NodeOutput(
             KlingCameraControl(
                 type=KlingCameraControlType(camera_control_type),
                 config=KlingCameraConfig(
@@ -395,303 +751,186 @@ class KlingCameraControls(KlingNodeBase):
                     tilt=tilt,
                     zoom=zoom,
                 ),
-            ),
+            )
         )
 
 
-class KlingTextToVideoNode(KlingNodeBase):
+class KlingTextToVideoNode(comfy_io.ComfyNode):
     """Kling Text to Video Node"""
 
-    @staticmethod
-    def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]:
-        """
-        Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
-        Only includes config combos that support the `image_tail` request field.
-
-        See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
-        """
-        return {
-            "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
-            "standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"),
-            "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
-            "pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"),
-            "standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"),
-            "standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"),
-            "pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"),
-            "pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"),
-            "standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"),
-            "standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"),
-            "pro mode / 5s duration / kling-v2-1-master": ("pro", "5", "kling-v2-1-master"),
-            "pro mode / 10s duration / kling-v2-1-master": ("pro", "10", "kling-v2-1-master"),
-            "pro mode / 5s duration / kling-v2-5-turbo": ("pro", "5", "kling-v2-5-turbo"),
-            "pro mode / 10s duration / kling-v2-5-turbo": ("pro", "10", "kling-v2-5-turbo"),
-        }
-
     @classmethod
-    def INPUT_TYPES(s):
-        modes = list(KlingTextToVideoNode.get_mode_string_mapping().keys())
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingText2VideoRequest, "prompt", multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True
-                ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingText2VideoRequest,
-                    "cfg_scale",
-                    default=1.0,
-                    min=0.0,
-                    max=1.0,
-                ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingText2VideoRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        modes = list(MODE_TEXT2VIDEO.keys())
+        return comfy_io.Schema(
+            node_id="KlingTextToVideoNode",
+            display_name="Kling Text to Video",
+            category="api node/video/Kling",
+            description="Kling Text to Video Node",
+            inputs=[
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Float.Input("cfg_scale", default=1.0, min=0.0, max=1.0),
+                comfy_io.Combo.Input(
                     "aspect_ratio",
-                    enum_type=KlingVideoGenAspectRatio,
+                    options=[i.value for i in KlingVideoGenAspectRatio],
+                    default="16:9",
                 ),
-                "mode": (
-                    modes,
-                    {
-                        "default": modes[4],
-                        "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.",
-                    },
+                comfy_io.Combo.Input(
+                    "mode",
+                    options=modes,
+                    default=modes[4],
+                    tooltip="The configuration to use for the video generation following the format: mode / duration / model_name.",
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
-    RETURN_NAMES = ("VIDEO", "video_id", "duration")
-    DESCRIPTION = "Kling Text to Video Node"
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingText2VideoResponse:
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingText2VideoResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
-            estimated_duration=AVERAGE_DURATION_T2V,
-            node_id=node_id,
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
         )
 
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         prompt: str,
         negative_prompt: str,
         cfg_scale: float,
         mode: str,
         aspect_ratio: str,
-        camera_control: Optional[KlingCameraControl] = None,
-        model_name: Optional[str] = None,
-        duration: Optional[str] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[VideoFromFile, str, str]:
-        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
-        if model_name is None:
-            mode, duration, model_name = self.get_mode_string_mapping()[mode]
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_TEXT_TO_VIDEO,
-                method=HttpMethod.POST,
-                request_model=KlingText2VideoRequest,
-                response_model=KlingText2VideoResponse,
-            ),
-            request=KlingText2VideoRequest(
-                prompt=prompt if prompt else None,
-                negative_prompt=negative_prompt if negative_prompt else None,
-                duration=KlingVideoGenDuration(duration),
-                mode=KlingVideoGenMode(mode),
-                model_name=KlingVideoGenModelName(model_name),
-                cfg_scale=cfg_scale,
-                aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
-                camera_control=camera_control,
-            ),
-            auth_kwargs=kwargs,
+    ) -> comfy_io.NodeOutput:
+        model_mode, duration, model_name = MODE_TEXT2VIDEO[mode]
+        return await execute_text2video(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            cfg_scale=cfg_scale,
+            model_mode=model_mode,
+            aspect_ratio=aspect_ratio,
+            model_name=model_name,
+            duration=duration,
         )
 
-        task_creation_response = await initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
 
-        task_id = task_creation_response.data.task_id
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-        validate_video_result_response(final_response)
-
-        video = get_video_from_response(final_response)
-        return await video_result_to_node_output(video)
-
-
-class KlingCameraControlT2VNode(KlingTextToVideoNode):
+class KlingCameraControlT2VNode(comfy_io.ComfyNode):
     """
     Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera.
     Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02.
     """
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingText2VideoRequest, "prompt", multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingText2VideoRequest,
-                    "negative_prompt",
-                    multiline=True,
-                ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingText2VideoRequest,
-                    "cfg_scale",
-                    default=0.75,
-                    min=0.0,
-                    max=1.0,
-                ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingText2VideoRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingCameraControlT2VNode",
+            display_name="Kling Text to Video (Camera Control)",
+            category="api node/video/Kling",
+            description="Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text.",
+            inputs=[
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Float.Input("cfg_scale", default=0.75, min=0.0, max=1.0),
+                comfy_io.Combo.Input(
                     "aspect_ratio",
-                    enum_type=KlingVideoGenAspectRatio,
+                    options=[i.value for i in KlingVideoGenAspectRatio],
+                    default="16:9",
                 ),
-                "camera_control": (
-                    "CAMERA_CONTROL",
-                    {
-                        "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
-                    },
+                comfy_io.Custom("CAMERA_CONTROL").Input(
+                    "camera_control",
+                    tooltip="Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         prompt: str,
         negative_prompt: str,
         cfg_scale: float,
         aspect_ratio: str,
         camera_control: Optional[KlingCameraControl] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        return await execute_text2video(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             model_name=KlingVideoGenModelName.kling_v1,
             cfg_scale=cfg_scale,
-            mode=KlingVideoGenMode.std,
+            model_mode=KlingVideoGenMode.std,
             aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
             duration=KlingVideoGenDuration.field_5,
             prompt=prompt,
             negative_prompt=negative_prompt,
             camera_control=camera_control,
-            **kwargs,
         )
 
 
-class KlingImage2VideoNode(KlingNodeBase):
+class KlingImage2VideoNode(comfy_io.ComfyNode):
     """Kling Image to Video Node"""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "start_frame": model_field_to_node_input(
-                    IO.IMAGE,
-                    KlingImage2VideoRequest,
-                    "image",
-                    tooltip="The reference image used to generate the video.",
-                ),
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingImage2VideoRequest,
-                    "negative_prompt",
-                    multiline=True,
-                ),
-                "model_name": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingImage2VideoNode",
+            display_name="Kling Image to Video",
+            category="api node/video/Kling",
+            description="Kling Image to Video Node",
+            inputs=[
+                comfy_io.Image.Input("start_frame", tooltip="The reference image used to generate the video."),
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Combo.Input(
                     "model_name",
-                    enum_type=KlingVideoGenModelName,
+                    options=[i.value for i in KlingVideoGenModelName],
+                    default="kling-v2-master",
                 ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingImage2VideoRequest,
-                    "cfg_scale",
-                    default=0.8,
-                    min=0.0,
-                    max=1.0,
-                ),
-                "mode": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
-                    "mode",
-                    enum_type=KlingVideoGenMode,
-                ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
+                comfy_io.Float.Input("cfg_scale", default=0.8, min=0.0, max=1.0),
+                comfy_io.Combo.Input("mode", options=[i.value for i in KlingVideoGenMode], default="std"),
+                comfy_io.Combo.Input(
                     "aspect_ratio",
-                    enum_type=KlingVideoGenAspectRatio,
+                    options=[i.value for i in KlingVideoGenAspectRatio],
+                    default="16:9",
                 ),
-                "duration": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
-                    "duration",
-                    enum_type=KlingVideoGenDuration,
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
-    RETURN_NAMES = ("VIDEO", "video_id", "duration")
-    DESCRIPTION = "Kling Image to Video Node"
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingImage2VideoResponse:
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=KlingImage2VideoRequest,
-                response_model=KlingImage2VideoResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
-            estimated_duration=AVERAGE_DURATION_I2V,
-            node_id=node_id,
+                comfy_io.Combo.Input("duration", options=[i.value for i in KlingVideoGenDuration], default="5"),
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
         )
 
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         start_frame: torch.Tensor,
         prompt: str,
         negative_prompt: str,
@@ -702,209 +941,151 @@ class KlingImage2VideoNode(KlingNodeBase):
         duration: str,
         camera_control: Optional[KlingCameraControl] = None,
         end_frame: Optional[torch.Tensor] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
-        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V)
-        validate_input_image(start_frame)
-
-        if camera_control is not None:
-            # Camera control type for image 2 video is always `simple`
-            camera_control.type = KlingCameraControlType.simple
-
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_IMAGE_TO_VIDEO,
-                method=HttpMethod.POST,
-                request_model=KlingImage2VideoRequest,
-                response_model=KlingImage2VideoResponse,
-            ),
-            request=KlingImage2VideoRequest(
-                model_name=KlingVideoGenModelName(model_name),
-                image=tensor_to_base64_string(start_frame),
-                image_tail=(
-                    tensor_to_base64_string(end_frame)
-                    if end_frame is not None
-                    else None
-                ),
-                prompt=prompt,
-                negative_prompt=negative_prompt if negative_prompt else None,
-                cfg_scale=cfg_scale,
-                mode=KlingVideoGenMode(mode),
-                duration=KlingVideoGenDuration(duration),
-                camera_control=camera_control,
-            ),
-            auth_kwargs=kwargs,
+    ) -> comfy_io.NodeOutput:
+        return await execute_image2video(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
+            start_frame=start_frame,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            cfg_scale=cfg_scale,
+            model_name=model_name,
+            aspect_ratio=aspect_ratio,
+            model_mode=mode,
+            duration=duration,
+            camera_control=camera_control,
+            end_frame=end_frame,
         )
 
-        task_creation_response = await initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.data.task_id
 
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-        validate_video_result_response(final_response)
-
-        video = get_video_from_response(final_response)
-        return await video_result_to_node_output(video)
-
-
-class KlingCameraControlI2VNode(KlingImage2VideoNode):
+class KlingCameraControlI2VNode(comfy_io.ComfyNode):
     """
     Kling Image to Video Camera Control Node. This node is a image to video node, but it supports controlling the camera.
     Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02.
     """
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "start_frame": model_field_to_node_input(
-                    IO.IMAGE, KlingImage2VideoRequest, "image"
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingCameraControlI2VNode",
+            display_name="Kling Image to Video (Camera Control)",
+            category="api node/video/Kling",
+            description="Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image.",
+            inputs=[
+                comfy_io.Image.Input(
+                    "start_frame",
+                    tooltip="Reference Image - URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1. Base64 should not include data:image prefix.",
                 ),
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingImage2VideoRequest,
-                    "negative_prompt",
-                    multiline=True,
-                ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingImage2VideoRequest,
-                    "cfg_scale",
-                    default=0.75,
-                    min=0.0,
-                    max=1.0,
-                ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Float.Input("cfg_scale", default=0.75, min=0.0, max=1.0),
+                comfy_io.Combo.Input(
                     "aspect_ratio",
-                    enum_type=KlingVideoGenAspectRatio,
+                    options=[i.value for i in KlingVideoGenAspectRatio],
+                    default="16:9",
                 ),
-                "camera_control": (
-                    "CAMERA_CONTROL",
-                    {
-                        "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
-                    },
+                comfy_io.Custom("CAMERA_CONTROL").Input(
+                    "camera_control",
+                    tooltip="Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         start_frame: torch.Tensor,
         prompt: str,
         negative_prompt: str,
         cfg_scale: float,
         aspect_ratio: str,
         camera_control: KlingCameraControl,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        return await execute_image2video(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             model_name=KlingVideoGenModelName.kling_v1_5,
             start_frame=start_frame,
             cfg_scale=cfg_scale,
-            mode=KlingVideoGenMode.pro,
+            model_mode=KlingVideoGenMode.pro,
             aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
             duration=KlingVideoGenDuration.field_5,
             prompt=prompt,
             negative_prompt=negative_prompt,
             camera_control=camera_control,
-            unique_id=unique_id,
-            **kwargs,
         )
 
 
-class KlingStartEndFrameNode(KlingImage2VideoNode):
+class KlingStartEndFrameNode(comfy_io.ComfyNode):
     """
     Kling First Last Frame Node. This node allows creation of a video from a first and last frame. It calls the normal image to video endpoint, but only allows the subset of input options that support the `image_tail` request field.
     """
 
-    @staticmethod
-    def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]:
-        """
-        Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
-        Only includes config combos that support the `image_tail` request field.
-
-        See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
-        """
-        return {
-            "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
-            "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
-            "pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"),
-            "pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"),
-            "pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"),
-            "pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"),
-            "pro mode / 5s duration / kling-v2-1": ("pro", "5", "kling-v2-1"),
-            "pro mode / 10s duration / kling-v2-1": ("pro", "10", "kling-v2-1"),
-        }
+    @classmethod
+    def define_schema(cls) -> comfy_io.Schema:
+        modes = list(MODE_START_END_FRAME.keys())
+        return comfy_io.Schema(
+            node_id="KlingStartEndFrameNode",
+            display_name="Kling Start-End Frame to Video",
+            category="api node/video/Kling",
+            description="Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last.",
+            inputs=[
+                comfy_io.Image.Input(
+                    "start_frame",
+                    tooltip="Reference Image - URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1. Base64 should not include data:image prefix.",
+                ),
+                comfy_io.Image.Input(
+                    "end_frame",
+                    tooltip="Reference Image - End frame control. URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px. Base64 should not include data:image prefix.",
+                ),
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Float.Input("cfg_scale", default=0.5, min=0.0, max=1.0),
+                comfy_io.Combo.Input(
+                    "aspect_ratio",
+                    options=[i.value for i in KlingVideoGenAspectRatio],
+                    default="16:9",
+                ),
+                comfy_io.Combo.Input(
+                    "mode",
+                    options=modes,
+                    default=modes[2],
+                    tooltip="The configuration to use for the video generation following the format: mode / duration / model_name.",
+                ),
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        modes = list(KlingStartEndFrameNode.get_mode_string_mapping().keys())
-        return {
-            "required": {
-                "start_frame": model_field_to_node_input(
-                    IO.IMAGE, KlingImage2VideoRequest, "image"
-                ),
-                "end_frame": model_field_to_node_input(
-                    IO.IMAGE, KlingImage2VideoRequest, "image_tail"
-                ),
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingImage2VideoRequest,
-                    "negative_prompt",
-                    multiline=True,
-                ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingImage2VideoRequest,
-                    "cfg_scale",
-                    default=0.5,
-                    min=0.0,
-                    max=1.0,
-                ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImage2VideoRequest,
-                    "aspect_ratio",
-                    enum_type=KlingVideoGenAspectRatio,
-                ),
-                "mode": (
-                    modes,
-                    {
-                        "default": modes[2],
-                        "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.",
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last."
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         start_frame: torch.Tensor,
         end_frame: torch.Tensor,
         prompt: str,
@@ -912,90 +1093,78 @@ class KlingStartEndFrameNode(KlingImage2VideoNode):
         cfg_scale: float,
         aspect_ratio: str,
         mode: str,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[
-            mode
-        ]
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        mode, duration, model_name = MODE_START_END_FRAME[mode]
+        return await execute_image2video(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             prompt=prompt,
             negative_prompt=negative_prompt,
             model_name=model_name,
             start_frame=start_frame,
             cfg_scale=cfg_scale,
-            mode=mode,
+            model_mode=mode,
             aspect_ratio=aspect_ratio,
             duration=duration,
             end_frame=end_frame,
-            unique_id=unique_id,
-            **kwargs,
         )
 
 
-class KlingVideoExtendNode(KlingNodeBase):
+class KlingVideoExtendNode(comfy_io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING, KlingVideoExtendRequest, "prompt", multiline=True
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingVideoExtendNode",
+            display_name="Kling Video Extend",
+            category="api node/video/Kling",
+            description="Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes.",
+            inputs=[
+                comfy_io.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="Positive text prompt for guiding the video extension",
                 ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingVideoExtendRequest,
+                comfy_io.String.Input(
                     "negative_prompt",
                     multiline=True,
+                    tooltip="Negative text prompt for elements to avoid in the extended video",
                 ),
-                "cfg_scale": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingVideoExtendRequest,
-                    "cfg_scale",
-                    default=0.5,
-                    min=0.0,
-                    max=1.0,
+                comfy_io.Float.Input("cfg_scale", default=0.5, min=0.0, max=1.0),
+                comfy_io.String.Input(
+                    "video_id",
+                    force_input=True,
+                    tooltip="The ID of the video to be extended. Supports videos generated by text-to-video, image-to-video, and previous video extension operations. Cannot exceed 3 minutes total duration after extension.",
                 ),
-                "video_id": model_field_to_node_input(
-                    IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
-    RETURN_NAMES = ("VIDEO", "video_id", "duration")
-    DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes."
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingVideoExtendResponse:
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_VIDEO_EXTEND}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingVideoExtendResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
-            estimated_duration=AVERAGE_DURATION_VIDEO_EXTEND,
-            node_id=node_id,
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
         )
 
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         prompt: str,
         negative_prompt: str,
         cfg_scale: float,
         video_id: str,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[VideoFromFile, str, str]:
+    ) -> comfy_io.NodeOutput:
         validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_VIDEO_EXTEND,
@@ -1009,560 +1178,323 @@ class KlingVideoExtendNode(KlingNodeBase):
                 cfg_scale=cfg_scale,
                 video_id=video_id,
             ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
 
         task_creation_response = await initial_operation.execute()
         validate_task_creation_response(task_creation_response)
         task_id = task_creation_response.data.task_id
 
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-        validate_video_result_response(final_response)
-
-        video = get_video_from_response(final_response)
-        return await video_result_to_node_output(video)
-
-
-class KlingVideoEffectsBase(KlingNodeBase):
-    """Kling Video Effects Base"""
-
-    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
-    RETURN_NAMES = ("VIDEO", "video_id", "duration")
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingVideoEffectsResponse:
-        return await poll_until_finished(
-            auth_kwargs,
+        final_response = await poll_until_finished(
+            auth,
             ApiEndpoint(
-                path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
+                path=f"{PATH_VIDEO_EXTEND}/{task_id}",
                 method=HttpMethod.GET,
                 request_model=EmptyRequest,
-                response_model=KlingVideoEffectsResponse,
+                response_model=KlingVideoExtendResponse,
             ),
             result_url_extractor=get_video_url_from_response,
-            estimated_duration=AVERAGE_DURATION_VIDEO_EFFECTS,
-            node_id=node_id,
-        )
-
-    async def api_call(
-        self,
-        dual_character: bool,
-        effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene,
-        model_name: str,
-        duration: KlingVideoGenDuration,
-        image_1: torch.Tensor,
-        image_2: Optional[torch.Tensor] = None,
-        mode: Optional[KlingVideoGenMode] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        if dual_character:
-            request_input_field = KlingDualCharacterEffectInput(
-                model_name=model_name,
-                mode=mode,
-                images=[
-                    tensor_to_base64_string(image_1),
-                    tensor_to_base64_string(image_2),
-                ],
-                duration=duration,
-            )
-        else:
-            request_input_field = KlingSingleImageEffectInput(
-                model_name=model_name,
-                image=tensor_to_base64_string(image_1),
-                duration=duration,
-            )
-
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_VIDEO_EFFECTS,
-                method=HttpMethod.POST,
-                request_model=KlingVideoEffectsRequest,
-                response_model=KlingVideoEffectsResponse,
-            ),
-            request=KlingVideoEffectsRequest(
-                effect_scene=effect_scene,
-                input=request_input_field,
-            ),
-            auth_kwargs=kwargs,
-        )
-
-        task_creation_response = await initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.data.task_id
-
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
+            estimated_duration=AVERAGE_DURATION_VIDEO_EXTEND,
+            node_id=cls.hidden.unique_id,
         )
         validate_video_result_response(final_response)
 
         video = get_video_from_response(final_response)
-        return await video_result_to_node_output(video)
+        return comfy_io.NodeOutput(await download_url_to_video_output(str(video.url)), str(video.id), str(video.duration))
 
 
-class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase):
+class KlingDualCharacterVideoEffectNode(comfy_io.ComfyNode):
     """Kling Dual Character Video Effect Node"""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image_left": (IO.IMAGE, {"tooltip": "Left side image"}),
-                "image_right": (IO.IMAGE, {"tooltip": "Right side image"}),
-                "effect_scene": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingVideoEffectsRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingDualCharacterVideoEffectNode",
+            display_name="Kling Dual Character Video Effects",
+            category="api node/video/Kling",
+            description="Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite.",
+            inputs=[
+                comfy_io.Image.Input("image_left", tooltip="Left side image"),
+                comfy_io.Image.Input("image_right", tooltip="Right side image"),
+                comfy_io.Combo.Input(
                     "effect_scene",
-                    enum_type=KlingDualCharacterEffectsScene,
+                    options=[i.value for i in KlingDualCharacterEffectsScene],
                 ),
-                "model_name": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingDualCharacterEffectInput,
+                comfy_io.Combo.Input(
                     "model_name",
-                    enum_type=KlingCharacterEffectModelName,
+                    options=[i.value for i in KlingCharacterEffectModelName],
+                    default="kling-v1",
                 ),
-                "mode": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingDualCharacterEffectInput,
+                comfy_io.Combo.Input(
                     "mode",
-                    enum_type=KlingVideoGenMode,
+                    options=[i.value for i in KlingVideoGenMode],
+                    default="std",
                 ),
-                "duration": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingDualCharacterEffectInput,
+                comfy_io.Combo.Input(
                     "duration",
-                    enum_type=KlingVideoGenDuration,
+                    options=[i.value for i in KlingVideoGenDuration],
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite."
-    RETURN_TYPES = ("VIDEO", "STRING")
-    RETURN_NAMES = ("VIDEO", "duration")
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         image_left: torch.Tensor,
         image_right: torch.Tensor,
         effect_scene: KlingDualCharacterEffectsScene,
         model_name: KlingCharacterEffectModelName,
         mode: KlingVideoGenMode,
         duration: KlingVideoGenDuration,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        video, _, duration = await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        video, _, duration = await execute_video_effect(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             dual_character=True,
             effect_scene=effect_scene,
             model_name=model_name,
-            mode=mode,
+            model_mode=mode,
             duration=duration,
             image_1=image_left,
             image_2=image_right,
-            unique_id=unique_id,
-            **kwargs,
         )
         return video, duration
 
 
-class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase):
+class KlingSingleImageVideoEffectNode(comfy_io.ComfyNode):
     """Kling Single Image Video Effect Node"""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (
-                    IO.IMAGE,
-                    {
-                        "tooltip": " Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1"
-                    },
-                ),
-                "effect_scene": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingVideoEffectsRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingSingleImageVideoEffectNode",
+            display_name="Kling Video Effects",
+            category="api node/video/Kling",
+            description="Achieve different special effects when generating a video based on the effect_scene.",
+            inputs=[
+                comfy_io.Image.Input("image", tooltip=" Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1"),
+                comfy_io.Combo.Input(
                     "effect_scene",
-                    enum_type=KlingSingleImageEffectsScene,
+                    options=[i.value for i in KlingSingleImageEffectsScene],
                 ),
-                "model_name": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingSingleImageEffectInput,
+                comfy_io.Combo.Input(
                     "model_name",
-                    enum_type=KlingSingleImageEffectModelName,
+                    options=[i.value for i in KlingSingleImageEffectModelName],
                 ),
-                "duration": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingSingleImageEffectInput,
+                comfy_io.Combo.Input(
                     "duration",
-                    enum_type=KlingVideoGenDuration,
+                    options=[i.value for i in KlingVideoGenDuration],
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         image: torch.Tensor,
         effect_scene: KlingSingleImageEffectsScene,
         model_name: KlingSingleImageEffectModelName,
         duration: KlingVideoGenDuration,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        return await execute_video_effect(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             dual_character=False,
             effect_scene=effect_scene,
             model_name=model_name,
             duration=duration,
             image_1=image,
-            unique_id=unique_id,
-            **kwargs,
         )
 
 
-class KlingLipSyncBase(KlingNodeBase):
-    """Kling Lip Sync Base"""
-
-    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
-    RETURN_NAMES = ("VIDEO", "video_id", "duration")
-
-    def validate_lip_sync_video(self, video: VideoInput):
-        """
-        Validates the input video adheres to the expectations of the Kling Lip Sync API:
-        - Video length does not exceed 10s and is not shorter than 2s
-        - Length and width dimensions should both be between 720px and 1920px
-
-        See: https://app.klingai.com/global/dev/document-api/apiReference/model/videoTolip
-        """
-        validate_video_dimensions(video, 720, 1920)
-        validate_video_duration(video, 2, 10)
-
-    def validate_text(self, text: str):
-        if not text:
-            raise ValueError("Text is required")
-        if len(text) > MAX_PROMPT_LENGTH_LIP_SYNC:
-            raise ValueError(
-                f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters."
-            )
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingLipSyncResponse:
-        """Polls the Kling API endpoint until the task reaches a terminal state."""
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_LIP_SYNC}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingLipSyncResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
-            estimated_duration=AVERAGE_DURATION_LIP_SYNC,
-            node_id=node_id,
-        )
-
-    async def api_call(
-        self,
-        video: VideoInput,
-        audio: Optional[AudioInput] = None,
-        voice_language: Optional[str] = None,
-        mode: Optional[str] = None,
-        text: Optional[str] = None,
-        voice_speed: Optional[float] = None,
-        voice_id: Optional[str] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[VideoFromFile, str, str]:
-        if text:
-            self.validate_text(text)
-        self.validate_lip_sync_video(video)
-
-        # Upload video to Comfy API and get download URL
-        video_url = await upload_video_to_comfyapi(video, auth_kwargs=kwargs)
-        logging.info("Uploaded video to Comfy API. URL: %s", video_url)
-
-        # Upload the audio file to Comfy API and get download URL
-        if audio:
-            audio_url = await upload_audio_to_comfyapi(audio, auth_kwargs=kwargs)
-            logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
-        else:
-            audio_url = None
-
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=PATH_LIP_SYNC,
-                method=HttpMethod.POST,
-                request_model=KlingLipSyncRequest,
-                response_model=KlingLipSyncResponse,
-            ),
-            request=KlingLipSyncRequest(
-                input=KlingLipSyncInputObject(
-                    video_url=video_url,
-                    mode=mode,
-                    text=text,
-                    voice_language=voice_language,
-                    voice_speed=voice_speed,
-                    audio_type="url",
-                    audio_url=audio_url,
-                    voice_id=voice_id,
-                ),
-            ),
-            auth_kwargs=kwargs,
-        )
-
-        task_creation_response = await initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.data.task_id
-
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-        validate_video_result_response(final_response)
-
-        video = get_video_from_response(final_response)
-        return await video_result_to_node_output(video)
-
-
-class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
+class KlingLipSyncAudioToVideoNode(comfy_io.ComfyNode):
     """Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "video": (IO.VIDEO, {}),
-                "audio": (IO.AUDIO, {}),
-                "voice_language": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingLipSyncInputObject,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingLipSyncAudioToVideoNode",
+            display_name="Kling Lip Sync Video with Audio",
+            category="api node/video/Kling",
+            description="Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
+            inputs=[
+                comfy_io.Video.Input("video"),
+                comfy_io.Audio.Input("audio"),
+                comfy_io.Combo.Input(
                     "voice_language",
-                    enum_type=KlingLipSyncVoiceLanguage,
+                    options=[i.value for i in KlingLipSyncVoiceLanguage],
+                    default="en",
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         video: VideoInput,
         audio: AudioInput,
         voice_language: str,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        return await execute_lipsync(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             video=video,
             audio=audio,
             voice_language=voice_language,
-            mode="audio2video",
-            unique_id=unique_id,
-            **kwargs,
+            model_mode="audio2video",
         )
 
 
-class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
+class KlingLipSyncTextToVideoNode(comfy_io.ComfyNode):
     """Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."""
 
-    @staticmethod
-    def get_voice_config() -> dict[str, tuple[str, str]]:
-        return {
-            # English voices
-            "Melody": ("girlfriend_4_speech02", "en"),
-            "Sunny": ("genshin_vindi2", "en"),
-            "Sage": ("zhinen_xuesheng", "en"),
-            "Ace": ("AOT", "en"),
-            "Blossom": ("ai_shatang", "en"),
-            "Peppy": ("genshin_klee2", "en"),
-            "Dove": ("genshin_kirara", "en"),
-            "Shine": ("ai_kaiya", "en"),
-            "Anchor": ("oversea_male1", "en"),
-            "Lyric": ("ai_chenjiahao_712", "en"),
-            "Tender": ("chat1_female_new-3", "en"),
-            "Siren": ("chat_0407_5-1", "en"),
-            "Zippy": ("cartoon-boy-07", "en"),
-            "Bud": ("uk_boy1", "en"),
-            "Sprite": ("cartoon-girl-01", "en"),
-            "Candy": ("PeppaPig_platform", "en"),
-            "Beacon": ("ai_huangzhong_712", "en"),
-            "Rock": ("ai_huangyaoshi_712", "en"),
-            "Titan": ("ai_laoguowang_712", "en"),
-            "Grace": ("chengshu_jiejie", "en"),
-            "Helen": ("you_pingjing", "en"),
-            "Lore": ("calm_story1", "en"),
-            "Crag": ("uk_man2", "en"),
-            "Prattle": ("laopopo_speech02", "en"),
-            "Hearth": ("heainainai_speech02", "en"),
-            "The Reader": ("reader_en_m-v1", "en"),
-            "Commercial Lady": ("commercial_lady_en_f-v1", "en"),
-            # Chinese voices
-            "阳光少年": ("genshin_vindi2", "zh"),
-            "懂事小弟": ("zhinen_xuesheng", "zh"),
-            "运动少年": ("tiyuxi_xuedi", "zh"),
-            "青春少女": ("ai_shatang", "zh"),
-            "温柔小妹": ("genshin_klee2", "zh"),
-            "元气少女": ("genshin_kirara", "zh"),
-            "阳光男生": ("ai_kaiya", "zh"),
-            "幽默小哥": ("tiexin_nanyou", "zh"),
-            "文艺小哥": ("ai_chenjiahao_712", "zh"),
-            "甜美邻家": ("girlfriend_1_speech02", "zh"),
-            "温柔姐姐": ("chat1_female_new-3", "zh"),
-            "职场女青": ("girlfriend_2_speech02", "zh"),
-            "活泼男童": ("cartoon-boy-07", "zh"),
-            "俏皮女童": ("cartoon-girl-01", "zh"),
-            "稳重老爸": ("ai_huangyaoshi_712", "zh"),
-            "温柔妈妈": ("you_pingjing", "zh"),
-            "严肃上司": ("ai_laoguowang_712", "zh"),
-            "优雅贵妇": ("chengshu_jiejie", "zh"),
-            "慈祥爷爷": ("zhuxi_speech02", "zh"),
-            "唠叨爷爷": ("uk_oldman3", "zh"),
-            "唠叨奶奶": ("laopopo_speech02", "zh"),
-            "和蔼奶奶": ("heainainai_speech02", "zh"),
-            "东北老铁": ("dongbeilaotie_speech02", "zh"),
-            "重庆小伙": ("chongqingxiaohuo_speech02", "zh"),
-            "四川妹子": ("chuanmeizi_speech02", "zh"),
-            "潮汕大叔": ("chaoshandashu_speech02", "zh"),
-            "台湾男生": ("ai_taiwan_man2_speech02", "zh"),
-            "西安掌柜": ("xianzhanggui_speech02", "zh"),
-            "天津姐姐": ("tianjinjiejie_speech02", "zh"),
-            "新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"),
-            "译制片男": ("yizhipiannan-v1", "zh"),
-            "撒娇女友": ("tianmeixuemei-v1", "zh"),
-            "刀片烟嗓": ("daopianyansang-v1", "zh"),
-            "乖巧正太": ("mengwa-v1", "zh"),
-        }
+    @classmethod
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingLipSyncTextToVideoNode",
+            display_name="Kling Lip Sync Video with Text",
+            category="api node/video/Kling",
+            description="Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
+            inputs=[
+                comfy_io.Video.Input("video"),
+                comfy_io.String.Input(
+                    "text",
+                    multiline=True,
+                    tooltip="Text Content for Lip-Sync Video Generation. Required when mode is text2video. Maximum length is 120 characters.",
+                ),
+                comfy_io.Combo.Input(
+                    "voice",
+                    options=list(VOICES_CONFIG.keys()),
+                    default="Melody",
+                ),
+                comfy_io.Float.Input(
+                    "voice_speed",
+                    default=1,
+                    min=0.8,
+                    max=2.0,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.",
+                ),
+            ],
+            outputs=[
+                comfy_io.Video.Output(),
+                comfy_io.String.Output(display_name="video_id"),
+                comfy_io.String.Output(display_name="duration"),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        voice_options = list(s.get_voice_config().keys())
-        return {
-            "required": {
-                "video": (IO.VIDEO, {}),
-                "text": model_field_to_node_input(
-                    IO.STRING, KlingLipSyncInputObject, "text", multiline=True
-                ),
-                "voice": (voice_options, {"default": voice_options[0]}),
-                "voice_speed": model_field_to_node_input(
-                    IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         video: VideoInput,
         text: str,
         voice: str,
         voice_speed: float,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice]
-        return await super().api_call(
+    ) -> comfy_io.NodeOutput:
+        voice_id, voice_language = VOICES_CONFIG[voice]
+        return await execute_lipsync(
+            auth_kwargs={
+                "auth_token": cls.hidden.auth_token_comfy_org,
+                "comfy_api_key": cls.hidden.api_key_comfy_org,
+            },
+            node_id=cls.hidden.unique_id,
             video=video,
             text=text,
             voice_language=voice_language,
             voice_id=voice_id,
             voice_speed=voice_speed,
-            mode="text2video",
-            unique_id=unique_id,
-            **kwargs,
+            model_mode="text2video",
         )
 
 
-class KlingImageGenerationBase(KlingNodeBase):
-    """Kling Image Generation Base Node."""
-
-    RETURN_TYPES = ("IMAGE",)
-    CATEGORY = "api node/image/Kling"
-
-    def validate_prompt(self, prompt: str, negative_prompt: Optional[str] = None):
-        if not prompt or len(prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN:
-            raise ValueError(
-                f"Prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters"
-            )
-        if negative_prompt and len(negative_prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN:
-            raise ValueError(
-                f"Negative prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters"
-            )
-
-
-class KlingVirtualTryOnNode(KlingImageGenerationBase):
+class KlingVirtualTryOnNode(comfy_io.ComfyNode):
     """Kling Virtual Try On Node."""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "human_image": (IO.IMAGE, {}),
-                "cloth_image": (IO.IMAGE, {}),
-                "model_name": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingVirtualTryOnRequest,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingVirtualTryOnNode",
+            display_name="Kling Virtual Try On",
+            category="api node/image/Kling",
+            description="Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human. You can merge multiple clothing item pictures into one image with a white background.",
+            inputs=[
+                comfy_io.Image.Input("human_image"),
+                comfy_io.Image.Input("cloth_image"),
+                comfy_io.Combo.Input(
                     "model_name",
-                    enum_type=KlingVirtualTryOnModelName,
+                    options=[i.value for i in KlingVirtualTryOnModelName],
+                    default="kolors-virtual-try-on-v1",
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human. You can merge multiple clothing item pictures into one image with a white background."
-
-    async def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> KlingVirtualTryOnResponse:
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingVirtualTryOnResponse,
-            ),
-            result_url_extractor=get_images_urls_from_response,
-            estimated_duration=AVERAGE_DURATION_VIRTUAL_TRY_ON,
-            node_id=node_id,
+            ],
+            outputs=[
+                comfy_io.Image.Output(),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
         )
 
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         human_image: torch.Tensor,
         cloth_image: torch.Tensor,
         model_name: KlingVirtualTryOnModelName,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> comfy_io.NodeOutput:
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_VIRTUAL_TRY_ON,
@@ -1575,113 +1507,99 @@ class KlingVirtualTryOnNode(KlingImageGenerationBase):
                 cloth_image=tensor_to_base64_string(cloth_image),
                 model_name=model_name,
             ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
 
         task_creation_response = await initial_operation.execute()
         validate_task_creation_response(task_creation_response)
         task_id = task_creation_response.data.task_id
 
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
+        final_response = await poll_until_finished(
+            auth,
+            ApiEndpoint(
+                path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
+                method=HttpMethod.GET,
+                request_model=EmptyRequest,
+                response_model=KlingVirtualTryOnResponse,
+            ),
+            result_url_extractor=get_images_urls_from_response,
+            estimated_duration=AVERAGE_DURATION_VIRTUAL_TRY_ON,
+            node_id=cls.hidden.unique_id,
         )
         validate_image_result_response(final_response)
 
         images = get_images_from_response(final_response)
-        return (await image_result_to_node_output(images),)
+        return comfy_io.NodeOutput(await image_result_to_node_output(images))
 
 
-class KlingImageGenerationNode(KlingImageGenerationBase):
+class KlingImageGenerationNode(comfy_io.ComfyNode):
     """Kling Image Generation Node. Generate an image from a text prompt with an optional reference image."""
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingImageGenerationsRequest,
-                    "prompt",
-                    multiline=True,
-                    max_length=MAX_PROMPT_LENGTH_IMAGE_GEN,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="KlingImageGenerationNode",
+            display_name="Kling Image Generation",
+            category="api node/image/Kling",
+            description="Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.",
+            inputs=[
+                comfy_io.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
+                comfy_io.String.Input("negative_prompt", multiline=True, tooltip="Negative text prompt"),
+                comfy_io.Combo.Input(
+                    "image_type",
+                    options=[i.value for i in KlingImageGenImageReferenceType],
                 ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    KlingImageGenerationsRequest,
-                    "negative_prompt",
-                    multiline=True,
-                ),
-                "image_type": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImageGenerationsRequest,
-                    "image_reference",
-                    enum_type=KlingImageGenImageReferenceType,
-                ),
-                "image_fidelity": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingImageGenerationsRequest,
+                comfy_io.Float.Input(
                     "image_fidelity",
-                    slider=True,
+                    default=0.5,
+                    min=0.0,
+                    max=1.0,
                     step=0.01,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Reference intensity for user-uploaded images",
                 ),
-                "human_fidelity": model_field_to_node_input(
-                    IO.FLOAT,
-                    KlingImageGenerationsRequest,
+                comfy_io.Float.Input(
                     "human_fidelity",
-                    slider=True,
+                    default=0.45,
+                    min=0.0,
+                    max=1.0,
                     step=0.01,
+                    display_mode=comfy_io.NumberDisplay.slider,
+                    tooltip="Subject reference similarity",
                 ),
-                "model_name": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImageGenerationsRequest,
+                comfy_io.Combo.Input(
                     "model_name",
-                    enum_type=KlingImageGenModelName,
+                    options=[i.value for i in KlingImageGenModelName],
+                    default="kling-v1",
                 ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.COMBO,
-                    KlingImageGenerationsRequest,
+                comfy_io.Combo.Input(
                     "aspect_ratio",
-                    enum_type=KlingImageGenAspectRatio,
+                    options=[i.value for i in KlingImageGenAspectRatio],
+                    default="16:9",
                 ),
-                "n": model_field_to_node_input(
-                    IO.INT,
-                    KlingImageGenerationsRequest,
+                comfy_io.Int.Input(
                     "n",
+                    default=1,
+                    min=1,
+                    max=9,
+                    tooltip="Number of generated images",
                 ),
-            },
-            "optional": {
-                "image": (IO.IMAGE, {}),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image."
-
-    async def get_response(
-        self,
-        task_id: str,
-        auth_kwargs: Optional[dict[str, str]],
-        node_id: Optional[str] = None,
-    ) -> KlingImageGenerationsResponse:
-        return await poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=KlingImageGenerationsResponse,
-            ),
-            result_url_extractor=get_images_urls_from_response,
-            estimated_duration=AVERAGE_DURATION_IMAGE_GEN,
-            node_id=node_id,
+                comfy_io.Image.Input("image", optional=True),
+            ],
+            outputs=[
+                comfy_io.Image.Output(),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
         )
 
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         model_name: KlingImageGenModelName,
         prompt: str,
         negative_prompt: str,
@@ -1691,10 +1609,9 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
         n: int,
         aspect_ratio: KlingImageGenAspectRatio,
         image: Optional[torch.Tensor] = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
-        self.validate_prompt(prompt, negative_prompt)
+    ) -> comfy_io.NodeOutput:
+        validate_string(prompt, field_name="prompt", min_length=1, max_length=MAX_PROMPT_LENGTH_IMAGE_GEN)
+        validate_string(negative_prompt, field_name="negative_prompt", max_length=MAX_PROMPT_LENGTH_IMAGE_GEN)
 
         if image is None:
             image_type = None
@@ -1703,6 +1620,10 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
         else:
             image = tensor_to_base64_string(image)
 
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_IMAGE_GENERATIONS,
@@ -1721,50 +1642,50 @@ class KlingImageGenerationNode(KlingImageGenerationBase):
                 n=n,
                 aspect_ratio=aspect_ratio,
             ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
 
         task_creation_response = await initial_operation.execute()
         validate_task_creation_response(task_creation_response)
         task_id = task_creation_response.data.task_id
 
-        final_response = await self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
+        final_response = await poll_until_finished(
+            auth,
+            ApiEndpoint(
+                path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
+                method=HttpMethod.GET,
+                request_model=EmptyRequest,
+                response_model=KlingImageGenerationsResponse,
+            ),
+            result_url_extractor=get_images_urls_from_response,
+            estimated_duration=AVERAGE_DURATION_IMAGE_GEN,
+            node_id=cls.hidden.unique_id,
         )
         validate_image_result_response(final_response)
 
         images = get_images_from_response(final_response)
-        return (await image_result_to_node_output(images),)
+        return comfy_io.NodeOutput(await image_result_to_node_output(images))
 
 
-NODE_CLASS_MAPPINGS = {
-    "KlingCameraControls": KlingCameraControls,
-    "KlingTextToVideoNode": KlingTextToVideoNode,
-    "KlingImage2VideoNode": KlingImage2VideoNode,
-    "KlingCameraControlI2VNode": KlingCameraControlI2VNode,
-    "KlingCameraControlT2VNode": KlingCameraControlT2VNode,
-    "KlingStartEndFrameNode": KlingStartEndFrameNode,
-    "KlingVideoExtendNode": KlingVideoExtendNode,
-    "KlingLipSyncAudioToVideoNode": KlingLipSyncAudioToVideoNode,
-    "KlingLipSyncTextToVideoNode": KlingLipSyncTextToVideoNode,
-    "KlingVirtualTryOnNode": KlingVirtualTryOnNode,
-    "KlingImageGenerationNode": KlingImageGenerationNode,
-    "KlingSingleImageVideoEffectNode": KlingSingleImageVideoEffectNode,
-    "KlingDualCharacterVideoEffectNode": KlingDualCharacterVideoEffectNode,
-}
+class KlingExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+        return [
+            KlingCameraControls,
+            KlingTextToVideoNode,
+            KlingImage2VideoNode,
+            KlingCameraControlI2VNode,
+            KlingCameraControlT2VNode,
+            KlingStartEndFrameNode,
+            KlingVideoExtendNode,
+            KlingLipSyncAudioToVideoNode,
+            KlingLipSyncTextToVideoNode,
+            KlingVirtualTryOnNode,
+            KlingImageGenerationNode,
+            KlingSingleImageVideoEffectNode,
+            KlingDualCharacterVideoEffectNode,
+        ]
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "KlingCameraControls": "Kling Camera Controls",
-    "KlingTextToVideoNode": "Kling Text to Video",
-    "KlingImage2VideoNode": "Kling Image to Video",
-    "KlingCameraControlI2VNode": "Kling Image to Video (Camera Control)",
-    "KlingCameraControlT2VNode": "Kling Text to Video (Camera Control)",
-    "KlingStartEndFrameNode": "Kling Start-End Frame to Video",
-    "KlingVideoExtendNode": "Kling Video Extend",
-    "KlingLipSyncAudioToVideoNode": "Kling Lip Sync Video with Audio",
-    "KlingLipSyncTextToVideoNode": "Kling Lip Sync Video with Text",
-    "KlingVirtualTryOnNode": "Kling Virtual Try On",
-    "KlingImageGenerationNode": "Kling Image Generation",
-    "KlingSingleImageVideoEffectNode": "Kling Video Effects",
-    "KlingDualCharacterVideoEffectNode": "Kling Dual Character Video Effects",
-}
+
+async def comfy_entrypoint() -> KlingExtension:
+    return KlingExtension()
diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py
index 6467dd614..55471a69d 100644
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@@ -473,7 +473,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
             height=width_height["height"],
             use_negative_prompts=True,
         )
-        """Upload image to comfy backend to have a URL available for further processing"""
+
         # Get MIME type from tensor - assuming PNG format for image tensors
         mime_type = "image/png"
 
@@ -591,7 +591,6 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
         validated_video = validate_video_to_video_input(video)
         video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
 
-        """Validate prompts and inference input"""
         validate_prompts(prompt, negative_prompt)
 
         # Only include motion_intensity for Motion Transfer
diff --git a/comfy_api_nodes/nodes_pika.py b/comfy_api_nodes/nodes_pika.py
index a8dc43cb3..0a9f04cc2 100644
--- a/comfy_api_nodes/nodes_pika.py
+++ b/comfy_api_nodes/nodes_pika.py
@@ -5,14 +5,16 @@ Pika API docs: https://pika-827374fb.mintlify.app/api-reference
 """
 from __future__ import annotations
 
-import io
+from io import BytesIO
 import logging
 from typing import Optional, TypeVar
+from enum import Enum
 
 import numpy as np
 import torch
 
-from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io as comfy_io
 from comfy_api.input_impl import VideoFromFile
 from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
 from comfy_api_nodes.apinode_utils import (
@@ -20,7 +22,6 @@ from comfy_api_nodes.apinode_utils import (
     tensor_to_bytesio,
 )
 from comfy_api_nodes.apis import (
-    IngredientsMode,
     PikaBodyGenerate22C2vGenerate22PikascenesPost,
     PikaBodyGenerate22I2vGenerate22I2vPost,
     PikaBodyGenerate22KeyframeGenerate22PikaframesPost,
@@ -28,10 +29,7 @@ from comfy_api_nodes.apis import (
     PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
     PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
     PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
-    PikaDurationEnum,
-    Pikaffect,
     PikaGenerateResponse,
-    PikaResolutionEnum,
     PikaVideoResponse,
 )
 from comfy_api_nodes.apis.client import (
@@ -41,7 +39,6 @@ from comfy_api_nodes.apis.client import (
     PollingOperation,
     SynchronousOperation,
 )
-from comfy_api_nodes.mapper_utils import model_field_to_node_input
 
 R = TypeVar("R")
 
@@ -58,6 +55,35 @@ PATH_PIKASCENES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikascenes"
 PATH_VIDEO_GET = "/proxy/pika/videos"
 
 
+class PikaDurationEnum(int, Enum):
+    integer_5 = 5
+    integer_10 = 10
+
+
+class PikaResolutionEnum(str, Enum):
+    field_1080p = "1080p"
+    field_720p = "720p"
+
+
+class Pikaffect(str, Enum):
+    Cake_ify = "Cake-ify"
+    Crumble = "Crumble"
+    Crush = "Crush"
+    Decapitate = "Decapitate"
+    Deflate = "Deflate"
+    Dissolve = "Dissolve"
+    Explode = "Explode"
+    Eye_pop = "Eye-pop"
+    Inflate = "Inflate"
+    Levitate = "Levitate"
+    Melt = "Melt"
+    Peel = "Peel"
+    Poke = "Poke"
+    Squish = "Squish"
+    Ta_da = "Ta-da"
+    Tear = "Tear"
+
+
 class PikaApiError(Exception):
     """Exception for Pika API errors."""
 
@@ -74,155 +100,121 @@ def is_valid_initial_response(response: PikaGenerateResponse) -> bool:
     return hasattr(response, "video_id") and response.video_id is not None
 
 
-class PikaNodeBase(ComfyNodeABC):
-    """Base class for Pika nodes."""
+async def poll_for_task_status(
+    task_id: str,
+    auth_kwargs: Optional[dict[str, str]] = None,
+    node_id: Optional[str] = None,
+) -> PikaGenerateResponse:
+    polling_operation = PollingOperation(
+        poll_endpoint=ApiEndpoint(
+            path=f"{PATH_VIDEO_GET}/{task_id}",
+            method=HttpMethod.GET,
+            request_model=EmptyRequest,
+            response_model=PikaVideoResponse,
+        ),
+        completed_statuses=[
+            "finished",
+        ],
+        failed_statuses=["failed", "cancelled"],
+        status_extractor=lambda response: (
+            response.status.value if response.status else None
+        ),
+        progress_extractor=lambda response: (
+            response.progress if hasattr(response, "progress") else None
+        ),
+        auth_kwargs=auth_kwargs,
+        result_url_extractor=lambda response: (
+            response.url if hasattr(response, "url") else None
+        ),
+        node_id=node_id,
+        estimated_duration=60
+    )
+    return await polling_operation.execute()
 
-    @classmethod
-    def get_base_inputs_types(
-        cls, request_model
-    ) -> dict[str, tuple[IO, InputTypeOptions]]:
-        """Get the base required inputs types common to all Pika nodes."""
-        return {
-            "prompt_text": model_field_to_node_input(
-                IO.STRING,
-                request_model,
-                "promptText",
-                multiline=True,
-            ),
-            "negative_prompt": model_field_to_node_input(
-                IO.STRING,
-                request_model,
-                "negativePrompt",
-                multiline=True,
-            ),
-            "seed": model_field_to_node_input(
-                IO.INT,
-                request_model,
-                "seed",
-                min=0,
-                max=0xFFFFFFFF,
-                control_after_generate=True,
-            ),
-            "resolution": model_field_to_node_input(
-                IO.COMBO,
-                request_model,
-                "resolution",
-                enum_type=PikaResolutionEnum,
-            ),
-            "duration": model_field_to_node_input(
-                IO.COMBO,
-                request_model,
-                "duration",
-                enum_type=PikaDurationEnum,
-            ),
-        }
 
-    CATEGORY = "api node/video/Pika"
-    API_NODE = True
-    FUNCTION = "api_call"
-    RETURN_TYPES = ("VIDEO",)
+async def execute_task(
+    initial_operation: SynchronousOperation[R, PikaGenerateResponse],
+    auth_kwargs: Optional[dict[str, str]] = None,
+    node_id: Optional[str] = None,
+) -> tuple[VideoFromFile]:
+    """Executes the initial operation then polls for the task status until it is completed.
 
-    async def poll_for_task_status(
-        self,
-        task_id: str,
-        auth_kwargs: Optional[dict[str, str]] = None,
-        node_id: Optional[str] = None,
-    ) -> PikaGenerateResponse:
-        polling_operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"{PATH_VIDEO_GET}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=PikaVideoResponse,
-            ),
-            completed_statuses=[
-                "finished",
-            ],
-            failed_statuses=["failed", "cancelled"],
-            status_extractor=lambda response: (
-                response.status.value if response.status else None
-            ),
-            progress_extractor=lambda response: (
-                response.progress if hasattr(response, "progress") else None
-            ),
-            auth_kwargs=auth_kwargs,
-            result_url_extractor=lambda response: (
-                response.url if hasattr(response, "url") else None
-            ),
-            node_id=node_id,
-            estimated_duration=60
+    Args:
+        initial_operation: The initial operation to execute.
+        auth_kwargs: The authentication token(s) to use for the API call.
+
+    Returns:
+        A tuple containing the video file as a VIDEO output.
+    """
+    initial_response = await initial_operation.execute()
+    if not is_valid_initial_response(initial_response):
+        error_msg = f"Pika initial request failed. Code: {initial_response.code}, Message: {initial_response.message}, Data: {initial_response.data}"
+        logging.error(error_msg)
+        raise PikaApiError(error_msg)
+
+    task_id = initial_response.video_id
+    final_response = await poll_for_task_status(task_id, auth_kwargs, node_id=node_id)
+    if not is_valid_video_response(final_response):
+        error_msg = (
+            f"Pika task {task_id} succeeded but no video data found in response."
         )
-        return await polling_operation.execute()
+        logging.error(error_msg)
+        raise PikaApiError(error_msg)
 
-    async def execute_task(
-        self,
-        initial_operation: SynchronousOperation[R, PikaGenerateResponse],
-        auth_kwargs: Optional[dict[str, str]] = None,
-        node_id: Optional[str] = None,
-    ) -> tuple[VideoFromFile]:
-        """Executes the initial operation then polls for the task status until it is completed.
+    video_url = str(final_response.url)
+    logging.info("Pika task %s succeeded. Video URL: %s", task_id, video_url)
 
-        Args:
-            initial_operation: The initial operation to execute.
-            auth_kwargs: The authentication token(s) to use for the API call.
-
-        Returns:
-            A tuple containing the video file as a VIDEO output.
-        """
-        initial_response = await initial_operation.execute()
-        if not is_valid_initial_response(initial_response):
-            error_msg = f"Pika initial request failed. Code: {initial_response.code}, Message: {initial_response.message}, Data: {initial_response.data}"
-            logging.error(error_msg)
-            raise PikaApiError(error_msg)
-
-        task_id = initial_response.video_id
-        final_response = await self.poll_for_task_status(task_id, auth_kwargs)
-        if not is_valid_video_response(final_response):
-            error_msg = (
-                f"Pika task {task_id} succeeded but no video data found in response."
-            )
-            logging.error(error_msg)
-            raise PikaApiError(error_msg)
-
-        video_url = str(final_response.url)
-        logging.info("Pika task %s succeeded. Video URL: %s", task_id, video_url)
-
-        return (await download_url_to_video_output(video_url),)
+    return (await download_url_to_video_output(video_url),)
 
 
-class PikaImageToVideoV2_2(PikaNodeBase):
+def get_base_inputs_types() -> list[comfy_io.Input]:
+    """Get the base required inputs types common to all Pika nodes."""
+    return [
+        comfy_io.String.Input("prompt_text", multiline=True),
+        comfy_io.String.Input("negative_prompt", multiline=True),
+        comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
+        comfy_io.Combo.Input(
+            "resolution", options=[resolution.value for resolution in PikaResolutionEnum], default="1080p"
+        ),
+        comfy_io.Combo.Input(
+            "duration", options=[duration.value for duration in PikaDurationEnum], default=5
+        ),
+    ]
+
+
+class PikaImageToVideoV2_2(comfy_io.ComfyNode):
     """Pika 2.2 Image to Video Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image": (
-                    IO.IMAGE,
-                    {"tooltip": "The image to convert to video"},
-                ),
-                **cls.get_base_inputs_types(PikaBodyGenerate22I2vGenerate22I2vPost),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="PikaImageToVideoNode2_2",
+            display_name="Pika Image to Video",
+            description="Sends an image and prompt to the Pika API v2.2 to generate a video.",
+            category="api node/video/Pika",
+            inputs=[
+                comfy_io.Image.Input("image", tooltip="The image to convert to video"),
+                *get_base_inputs_types(),
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Sends an image and prompt to the Pika API v2.2 to generate a video."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         image: torch.Tensor,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
         resolution: str,
         duration: int,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
+    ) -> comfy_io.NodeOutput:
         # Convert image to BytesIO
         image_bytes_io = tensor_to_bytesio(image)
         image_bytes_io.seek(0)
@@ -237,7 +229,10 @@ class PikaImageToVideoV2_2(PikaNodeBase):
             resolution=resolution,
             duration=duration,
         )
-
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_IMAGE_TO_VIDEO,
@@ -248,50 +243,55 @@ class PikaImageToVideoV2_2(PikaNodeBase):
             request=pika_request_data,
             files=pika_files,
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
-
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikaTextToVideoNodeV2_2(PikaNodeBase):
+class PikaTextToVideoNodeV2_2(comfy_io.ComfyNode):
     """Pika Text2Video v2.2 Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                **cls.get_base_inputs_types(PikaBodyGenerate22T2vGenerate22T2vPost),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.FLOAT,
-                    PikaBodyGenerate22T2vGenerate22T2vPost,
-                    "aspectRatio",
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="PikaTextToVideoNode2_2",
+            display_name="Pika Text to Video",
+            description="Sends a text prompt to the Pika API v2.2 to generate a video.",
+            category="api node/video/Pika",
+            inputs=[
+                *get_base_inputs_types(),
+                comfy_io.Float.Input(
+                    "aspect_ratio",
                     step=0.001,
                     min=0.4,
                     max=2.5,
                     default=1.7777777777777777,
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+                    tooltip="Aspect ratio (width / height)",
+                )
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Sends a text prompt to the Pika API v2.2 to generate a video."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
         resolution: str,
         duration: int,
         aspect_ratio: float,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
+    ) -> comfy_io.NodeOutput:
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_TEXT_TO_VIDEO,
@@ -307,62 +307,75 @@ class PikaTextToVideoNodeV2_2(PikaNodeBase):
                 duration=duration,
                 aspectRatio=aspect_ratio,
             ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
             content_type="application/x-www-form-urlencoded",
         )
-
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikaScenesV2_2(PikaNodeBase):
+class PikaScenesV2_2(comfy_io.ComfyNode):
     """PikaScenes v2.2 Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        image_ingredient_input = (
-            IO.IMAGE,
-            {"tooltip": "Image that will be used as ingredient to create a video."},
-        )
-        return {
-            "required": {
-                **cls.get_base_inputs_types(
-                    PikaBodyGenerate22C2vGenerate22PikascenesPost,
-                ),
-                "ingredients_mode": model_field_to_node_input(
-                    IO.COMBO,
-                    PikaBodyGenerate22C2vGenerate22PikascenesPost,
-                    "ingredientsMode",
-                    enum_type=IngredientsMode,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="PikaScenesV2_2",
+            display_name="Pika Scenes (Video Image Composition)",
+            description="Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them.",
+            category="api node/video/Pika",
+            inputs=[
+                *get_base_inputs_types(),
+                comfy_io.Combo.Input(
+                    "ingredients_mode",
+                    options=["creative", "precise"],
                     default="creative",
                 ),
-                "aspect_ratio": model_field_to_node_input(
-                    IO.FLOAT,
-                    PikaBodyGenerate22C2vGenerate22PikascenesPost,
-                    "aspectRatio",
+                comfy_io.Float.Input(
+                    "aspect_ratio",
                     step=0.001,
                     min=0.4,
                     max=2.5,
                     default=1.7777777777777777,
+                    tooltip="Aspect ratio (width / height)",
                 ),
-            },
-            "optional": {
-                "image_ingredient_1": image_ingredient_input,
-                "image_ingredient_2": image_ingredient_input,
-                "image_ingredient_3": image_ingredient_input,
-                "image_ingredient_4": image_ingredient_input,
-                "image_ingredient_5": image_ingredient_input,
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+                comfy_io.Image.Input(
+                    "image_ingredient_1",
+                    optional=True,
+                    tooltip="Image that will be used as ingredient to create a video.",
+                ),
+                comfy_io.Image.Input(
+                    "image_ingredient_2",
+                    optional=True,
+                    tooltip="Image that will be used as ingredient to create a video.",
+                ),
+                comfy_io.Image.Input(
+                    "image_ingredient_3",
+                    optional=True,
+                    tooltip="Image that will be used as ingredient to create a video.",
+                ),
+                comfy_io.Image.Input(
+                    "image_ingredient_4",
+                    optional=True,
+                    tooltip="Image that will be used as ingredient to create a video.",
+                ),
+                comfy_io.Image.Input(
+                    "image_ingredient_5",
+                    optional=True,
+                    tooltip="Image that will be used as ingredient to create a video.",
+                ),
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
@@ -370,14 +383,12 @@ class PikaScenesV2_2(PikaNodeBase):
         duration: int,
         ingredients_mode: str,
         aspect_ratio: float,
-        unique_id: str,
         image_ingredient_1: Optional[torch.Tensor] = None,
         image_ingredient_2: Optional[torch.Tensor] = None,
         image_ingredient_3: Optional[torch.Tensor] = None,
         image_ingredient_4: Optional[torch.Tensor] = None,
         image_ingredient_5: Optional[torch.Tensor] = None,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
+    ) -> comfy_io.NodeOutput:
         # Convert all passed images to BytesIO
         all_image_bytes_io = []
         for image in [
@@ -406,7 +417,10 @@ class PikaScenesV2_2(PikaNodeBase):
             duration=duration,
             aspectRatio=aspect_ratio,
         )
-
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_PIKASCENES,
@@ -417,63 +431,54 @@ class PikaScenesV2_2(PikaNodeBase):
             request=pika_request_data,
             files=pika_files,
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
 
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikAdditionsNode(PikaNodeBase):
+class PikAdditionsNode(comfy_io.ComfyNode):
     """Pika Pikadditions Node. Add an image into a video."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "video": (IO.VIDEO, {"tooltip": "The video to add an image to."}),
-                "image": (IO.IMAGE, {"tooltip": "The image to add to the video."}),
-                "prompt_text": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
-                    "promptText",
-                    multiline=True,
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
-                    "negativePrompt",
-                    multiline=True,
-                ),
-                "seed": model_field_to_node_input(
-                    IO.INT,
-                    PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="Pikadditions",
+            display_name="Pikadditions (Video Object Insertion)",
+            description="Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result.",
+            category="api node/video/Pika",
+            inputs=[
+                comfy_io.Video.Input("video", tooltip="The video to add an image to."),
+                comfy_io.Image.Input("image", tooltip="The image to add to the video."),
+                comfy_io.String.Input("prompt_text", multiline=True),
+                comfy_io.String.Input("negative_prompt", multiline=True),
+                comfy_io.Int.Input(
                     "seed",
                     min=0,
                     max=0xFFFFFFFF,
                     control_after_generate=True,
                 ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         video: VideoInput,
         image: torch.Tensor,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
+    ) -> comfy_io.NodeOutput:
         # Convert video to BytesIO
-        video_bytes_io = io.BytesIO()
+        video_bytes_io = BytesIO()
         video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
         video_bytes_io.seek(0)
 
@@ -492,7 +497,10 @@ class PikAdditionsNode(PikaNodeBase):
             negativePrompt=negative_prompt,
             seed=seed,
         )
-
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_PIKADDITIONS,
@@ -503,74 +511,51 @@ class PikAdditionsNode(PikaNodeBase):
             request=pika_request_data,
             files=pika_files,
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
 
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikaSwapsNode(PikaNodeBase):
+class PikaSwapsNode(comfy_io.ComfyNode):
     """Pika Pikaswaps Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "video": (IO.VIDEO, {"tooltip": "The video to swap an object in."}),
-                "image": (
-                    IO.IMAGE,
-                    {
-                        "tooltip": "The image used to replace the masked object in the video."
-                    },
-                ),
-                "mask": (
-                    IO.MASK,
-                    {"tooltip": "Use the mask to define areas in the video to replace"},
-                ),
-                "prompt_text": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
-                    "promptText",
-                    multiline=True,
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
-                    "negativePrompt",
-                    multiline=True,
-                ),
-                "seed": model_field_to_node_input(
-                    IO.INT,
-                    PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
-                    "seed",
-                    min=0,
-                    max=0xFFFFFFFF,
-                    control_after_generate=True,
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="Pikaswaps",
+            display_name="Pika Swaps (Video Object Replacement)",
+            description="Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates.",
+            category="api node/video/Pika",
+            inputs=[
+                comfy_io.Video.Input("video", tooltip="The video to swap an object in."),
+                comfy_io.Image.Input("image", tooltip="The image used to replace the masked object in the video."),
+                comfy_io.Mask.Input("mask", tooltip="Use the mask to define areas in the video to replace"),
+                comfy_io.String.Input("prompt_text", multiline=True),
+                comfy_io.String.Input("negative_prompt", multiline=True),
+                comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates."
-    RETURN_TYPES = ("VIDEO",)
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         video: VideoInput,
         image: torch.Tensor,
         mask: torch.Tensor,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
+    ) -> comfy_io.NodeOutput:
         # Convert video to BytesIO
-        video_bytes_io = io.BytesIO()
+        video_bytes_io = BytesIO()
         video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
         video_bytes_io.seek(0)
 
@@ -579,7 +564,7 @@ class PikaSwapsNode(PikaNodeBase):
         mask = mask.repeat(1, 3, 1, 1)
 
         # Convert 3-channel binary mask to BytesIO
-        mask_bytes_io = io.BytesIO()
+        mask_bytes_io = BytesIO()
         mask_bytes_io.write(mask.numpy().astype(np.uint8))
         mask_bytes_io.seek(0)
 
@@ -599,7 +584,10 @@ class PikaSwapsNode(PikaNodeBase):
             negativePrompt=negative_prompt,
             seed=seed,
         )
-
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_PIKADDITIONS,
@@ -610,71 +598,52 @@ class PikaSwapsNode(PikaNodeBase):
             request=pika_request_data,
             files=pika_files,
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
-
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikaffectsNode(PikaNodeBase):
+class PikaffectsNode(comfy_io.ComfyNode):
     """Pika Pikaffects Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image": (
-                    IO.IMAGE,
-                    {"tooltip": "The reference image to apply the Pikaffect to."},
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="Pikaffects",
+            display_name="Pikaffects (Video Effects)",
+            description="Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear",
+            category="api node/video/Pika",
+            inputs=[
+                comfy_io.Image.Input("image", tooltip="The reference image to apply the Pikaffect to."),
+                comfy_io.Combo.Input(
+                    "pikaffect", options=[pikaffect.value for pikaffect in Pikaffect], default="Cake-ify"
                 ),
-                "pikaffect": model_field_to_node_input(
-                    IO.COMBO,
-                    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
-                    "pikaffect",
-                    enum_type=Pikaffect,
-                    default="Cake-ify",
-                ),
-                "prompt_text": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
-                    "promptText",
-                    multiline=True,
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
-                    "negativePrompt",
-                    multiline=True,
-                ),
-                "seed": model_field_to_node_input(
-                    IO.INT,
-                    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
-                    "seed",
-                    min=0,
-                    max=0xFFFFFFFF,
-                    control_after_generate=True,
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+                comfy_io.String.Input("prompt_text", multiline=True),
+                comfy_io.String.Input("negative_prompt", multiline=True),
+                comfy_io.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear"
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         image: torch.Tensor,
         pikaffect: str,
         prompt_text: str,
         negative_prompt: str,
         seed: int,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
-
+    ) -> comfy_io.NodeOutput:
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_PIKAFFECTS,
@@ -690,36 +659,38 @@ class PikaffectsNode(PikaNodeBase):
             ),
             files={"image": ("image.png", tensor_to_bytesio(image), "image/png")},
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
-
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-class PikaStartEndFrameNode2_2(PikaNodeBase):
+class PikaStartEndFrameNode2_2(comfy_io.ComfyNode):
     """PikaFrames v2.2 Node."""
 
     @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image_start": (IO.IMAGE, {"tooltip": "The first image to combine."}),
-                "image_end": (IO.IMAGE, {"tooltip": "The last image to combine."}),
-                **cls.get_base_inputs_types(
-                    PikaBodyGenerate22KeyframeGenerate22PikaframesPost
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
+    def define_schema(cls) -> comfy_io.Schema:
+        return comfy_io.Schema(
+            node_id="PikaStartEndFrameNode2_2",
+            display_name="Pika Start and End Frame to Video",
+            description="Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them.",
+            category="api node/video/Pika",
+            inputs=[
+                comfy_io.Image.Input("image_start", tooltip="The first image to combine."),
+                comfy_io.Image.Input("image_end", tooltip="The last image to combine."),
+                *get_base_inputs_types(),
+            ],
+            outputs=[comfy_io.Video.Output()],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    DESCRIPTION = "Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them."
-
-    async def api_call(
-        self,
+    @classmethod
+    async def execute(
+        cls,
         image_start: torch.Tensor,
         image_end: torch.Tensor,
         prompt_text: str,
@@ -727,15 +698,15 @@ class PikaStartEndFrameNode2_2(PikaNodeBase):
         seed: int,
         resolution: str,
         duration: int,
-        unique_id: str,
-        **kwargs,
-    ) -> tuple[VideoFromFile]:
-
+    ) -> comfy_io.NodeOutput:
         pika_files = [
             ("keyFrames", ("image_start.png", tensor_to_bytesio(image_start), "image/png")),
             ("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")),
         ]
-
+        auth = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
         initial_operation = SynchronousOperation(
             endpoint=ApiEndpoint(
                 path=PATH_PIKAFRAMES,
@@ -752,28 +723,24 @@ class PikaStartEndFrameNode2_2(PikaNodeBase):
             ),
             files=pika_files,
             content_type="multipart/form-data",
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
         )
-
-        return await self.execute_task(initial_operation, auth_kwargs=kwargs, node_id=unique_id)
+        return await execute_task(initial_operation, auth_kwargs=auth, node_id=cls.hidden.unique_id)
 
 
-NODE_CLASS_MAPPINGS = {
-    "PikaImageToVideoNode2_2": PikaImageToVideoV2_2,
-    "PikaTextToVideoNode2_2": PikaTextToVideoNodeV2_2,
-    "PikaScenesV2_2": PikaScenesV2_2,
-    "Pikadditions": PikAdditionsNode,
-    "Pikaswaps": PikaSwapsNode,
-    "Pikaffects": PikaffectsNode,
-    "PikaStartEndFrameNode2_2": PikaStartEndFrameNode2_2,
-}
+class PikaApiNodesExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
+        return [
+            PikaImageToVideoV2_2,
+            PikaTextToVideoNodeV2_2,
+            PikaScenesV2_2,
+            PikAdditionsNode,
+            PikaSwapsNode,
+            PikaffectsNode,
+            PikaStartEndFrameNode2_2,
+        ]
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "PikaImageToVideoNode2_2": "Pika Image to Video",
-    "PikaTextToVideoNode2_2": "Pika Text to Video",
-    "PikaScenesV2_2": "Pika Scenes (Video Image Composition)",
-    "Pikadditions": "Pikadditions (Video Object Insertion)",
-    "Pikaswaps": "Pika Swaps (Video Object Replacement)",
-    "Pikaffects": "Pikaffects (Video Effects)",
-    "PikaStartEndFrameNode2_2": "Pika Start and End Frame to Video",
-}
+
+async def comfy_entrypoint() -> PikaApiNodesExtension:
+    return PikaApiNodesExtension()
diff --git a/comfy_api_nodes/nodes_pixverse.py b/comfy_api_nodes/nodes_pixverse.py
index eb98e9653..2c91bbc65 100644
--- a/comfy_api_nodes/nodes_pixverse.py
+++ b/comfy_api_nodes/nodes_pixverse.py
@@ -146,7 +146,7 @@ class PixverseTextToVideoNode(comfy_io.ComfyNode):
                 comfy_io.String.Input(
                     "negative_prompt",
                     default="",
-                    force_input=True,
+                    multiline=True,
                     tooltip="An optional text description of undesired elements on an image.",
                     optional=True,
                 ),
@@ -284,7 +284,7 @@ class PixverseImageToVideoNode(comfy_io.ComfyNode):
                 comfy_io.String.Input(
                     "negative_prompt",
                     default="",
-                    force_input=True,
+                    multiline=True,
                     tooltip="An optional text description of undesired elements on an image.",
                     optional=True,
                 ),
@@ -425,7 +425,7 @@ class PixverseTransitionVideoNode(comfy_io.ComfyNode):
                 comfy_io.String.Input(
                     "negative_prompt",
                     default="",
-                    force_input=True,
+                    multiline=True,
                     tooltip="An optional text description of undesired elements on an image.",
                     optional=True,
                 ),
diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py
index a006104b7..0bbb551b8 100644
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@@ -107,7 +107,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
         # if list already exists exists, just extend list with data
         for check_list in lists_to_check:
             for conv_tuple in check_list:
-                if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list:
+                if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
                     conv_tuple[1].append(formatter(data))
                     return True
         return False
@@ -119,7 +119,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
     if formatter is None:
         formatter = lambda v: v  # Multipart representation of value
 
-    if type(data) is not dict:
+    if not isinstance(data, dict):
         # if list already exists exists, just extend list with data
         added = handle_converted_lists(data, parent_key, converted_to_check)
         if added:
@@ -136,9 +136,9 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
 
     for key, value in data.items():
         current_key = key if parent_key is None else f"{parent_key}[{key}]"
-        if type(value) is dict:
+        if isinstance(value, dict):
             converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
-        elif type(value) is list:
+        elif isinstance(value, list):
             for ind, list_value in enumerate(value):
                 iter_key = f"{current_key}[]"
                 converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index 51c8b9dd9..1c868fcba 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -360,7 +360,7 @@ class RecordAudio:
     def load(self, audio):
         audio_path = folder_paths.get_annotated_filepath(audio)
 
-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load(audio_path)
         audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
         return (audio, )
 
diff --git a/comfyui_version.py b/comfyui_version.py
index ac76fbe35..c3257d4bf 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.62"
+__version__ = "0.3.63"
diff --git a/pyproject.toml b/pyproject.toml
index 383e7d10a..abd1a5f5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.62"
+version = "0.3.63"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
@@ -57,18 +57,14 @@ messages_control.disable = [
   "redefined-builtin",
   "unnecessary-lambda",
   "dangerous-default-value",
+  "invalid-overridden-method",
   # next warnings should be fixed in future
   "bad-classmethod-argument",  # Class method should have 'cls' as first argument
   "wrong-import-order",  # Standard imports should be placed before third party imports
   "logging-fstring-interpolation", # Use lazy % formatting in logging functions
   "ungrouped-imports",
   "unnecessary-pass",
-  "unidiomatic-typecheck",
   "unnecessary-lambda-assignment",
   "no-else-return",
-  "no-else-raise",
-  "invalid-overridden-method",
   "unused-variable",
-  "pointless-string-statement",
-  "redefined-outer-name",
 ]
diff --git a/requirements.txt b/requirements.txt
index b73afb785..96c712c48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.27.7
-comfyui-workflow-templates==0.1.91
+comfyui-workflow-templates==0.1.93
 comfyui-embedded-docs==0.2.6
 comfyui_manager==4.0.2
 torch
@@ -26,6 +26,5 @@ av>=14.2.0
 #non essential dependencies:
 kornia>=0.7.1
 spandrel
-soundfile
 pydantic~=2.0
 pydantic-settings~=2.0