Merge upstream/master, keep local README.md

2026-04-19 06:52:31 +08:00 · 2025-10-07 00:32:58 +00:00 · 2025-10-07 00:32:58 +00:00 · 7db5d45147
commit 7db5d45147
parent 5e33515edc 8c19910427
41 changed files with 3297 additions and 3000 deletions
--- a/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
@ -3,10 +3,13 @@ https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOW
 HOW TO RUN:
-if you have a AMD gpu:
+If you have a AMD gpu:
 run_amd_gpu.bat
 If you have memory issues you can try disabling the smart memory management by running comfyui with:
 run_amd_gpu_disable_smart_memory.bat
 IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints
--- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
@ -0,0 +1,2 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
 pause
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@ -21,3 +21,28 @@ jobs:
    - name: Run Ruff
      run: ruff check .
  pylint:
    name: Run Pylint
    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.12'
    - name: Install requirements
      run: |
        python -m pip install --upgrade pip
        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
        pip install -r requirements.txt
    - name: Install Pylint
      run: pip install pylint
    - name: Run Pylint
      run: pylint comfy_api_nodes
--- a/comfy/ldm/ace/vae/music_dcae_pipeline.py
+++ b/comfy/ldm/ace/vae/music_dcae_pipeline.py
@ -23,8 +23,6 @@ class MusicDCAE(torch.nn.Module):
        else:
            self.source_sample_rate = source_sample_rate
        # self.resampler = torchaudio.transforms.Resample(source_sample_rate, 44100)
        self.transform = transforms.Compose([
            transforms.Normalize(0.5, 0.5),
        ])
@ -37,10 +35,6 @@ class MusicDCAE(torch.nn.Module):
        self.scale_factor = 0.1786
        self.shift_factor = -1.9091
    def load_audio(self, audio_path):
        audio, sr = torchaudio.load(audio_path)
        return audio, sr
    def forward_mel(self, audios):
        mels = []
        for i in range(len(audios)):
@ -73,10 +67,8 @@ class MusicDCAE(torch.nn.Module):
            latent = self.dcae.encoder(mel.unsqueeze(0))
            latents.append(latent)
        latents = torch.cat(latents, dim=0)
        # latent_lengths = (audio_lengths / sr * 44100 / 512 / self.time_dimention_multiple).long()
        latents = (latents - self.shift_factor) * self.scale_factor
        return latents
        # return latents, latent_lengths
    @torch.no_grad()
    def decode(self, latents, audio_lengths=None, sr=None):
@ -91,9 +83,7 @@ class MusicDCAE(torch.nn.Module):
            wav = self.vocoder.decode(mels[0]).squeeze(1)
            if sr is not None:
                # resampler = torchaudio.transforms.Resample(44100, sr).to(latents.device).to(latents.dtype)
                wav = torchaudio.functional.resample(wav, 44100, sr)
                # wav = resampler(wav)
            else:
                sr = 44100
            pred_wavs.append(wav)
@ -101,7 +91,6 @@ class MusicDCAE(torch.nn.Module):
        if audio_lengths is not None:
            pred_wavs = [wav[:, :length].cpu() for wav, length in zip(pred_wavs, audio_lengths)]
        return torch.stack(pred_wavs)
        # return sr, pred_wavs
    def forward(self, audios, audio_lengths=None, sr=None):
        latents, latent_lengths = self.encode(audios=audios, audio_lengths=audio_lengths, sr=sr)
--- a/comfy/ldm/hunyuan_video/vae_refiner.py
+++ b/comfy/ldm/hunyuan_video/vae_refiner.py
@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d
+from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d, Normalize
 import comfy.ops
 import comfy.ldm.models.autoencoder
 ops = comfy.ops.disable_weight_init
@ -17,11 +17,12 @@ class RMS_norm(nn.Module):
        return F.normalize(x, dim=1) * self.scale * self.gamma
 class DnSmpl(nn.Module):
-    def __init__(self, ic, oc, tds=True):
+    def __init__(self, ic, oc, tds=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tds else 1 * 2 * 2
        assert oc % fct == 0
-        self.conv = VideoConv3d(ic, oc // fct, kernel_size=3)
+        self.conv = op(ic, oc // fct, kernel_size=3, stride=1, padding=1)
        self.refiner_vae = refiner_vae
        self.tds = tds
        self.gs = fct * ic // oc
@ -30,7 +31,7 @@ class DnSmpl(nn.Module):
        r1 = 2 if self.tds else 1
        h = self.conv(x)
-        if self.tds:
+        if self.tds and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            hf = hf.reshape(b, c, f, ht // 2, 2, wd // 2, 2)
@ -66,6 +67,7 @@ class DnSmpl(nn.Module):
            sc = torch.cat([xf, xn], dim=2)
        else:
            b, c, frms, ht, wd = h.shape
            nf = frms // r1
            h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
            h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
@ -83,10 +85,11 @@ class DnSmpl(nn.Module):
 class UpSmpl(nn.Module):
-    def __init__(self, ic, oc, tus=True):
+    def __init__(self, ic, oc, tus=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tus else 1 * 2 * 2
-        self.conv = VideoConv3d(ic, oc * fct, kernel_size=3)
+        self.conv = op(ic, oc * fct, kernel_size=3, stride=1, padding=1)
        self.refiner_vae = refiner_vae
        self.tus = tus
        self.rp = fct * oc // ic
@ -95,7 +98,7 @@ class UpSmpl(nn.Module):
        r1 = 2 if self.tus else 1
        h = self.conv(x)
-        if self.tus:
+        if self.tus and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            nc = c // (2 * 2)
@ -148,43 +151,56 @@ class UpSmpl(nn.Module):
 class Encoder(nn.Module):
    def __init__(self, in_channels, z_channels, block_out_channels, num_res_blocks,
-                 ffactor_spatial, ffactor_temporal, downsample_match_channel=True, **_):
+                 ffactor_spatial, ffactor_temporal, downsample_match_channel=True, refiner_vae=True, **_):
        super().__init__()
        self.z_channels = z_channels
        self.block_out_channels = block_out_channels
        self.num_res_blocks = num_res_blocks
-        self.conv_in = VideoConv3d(in_channels, block_out_channels[0], 3, 1, 1)
+        self.ffactor_temporal = ffactor_temporal
        self.refiner_vae = refiner_vae
        if self.refiner_vae:
            conv_op = VideoConv3d
            norm_op = RMS_norm
        else:
            conv_op = ops.Conv3d
            norm_op = Normalize
        self.conv_in = conv_op(in_channels, block_out_channels[0], 3, 1, 1)
        self.down = nn.ModuleList()
        ch = block_out_channels[0]
        depth = (ffactor_spatial >> 1).bit_length()
-        depth_temporal = ((ffactor_spatial // ffactor_temporal) >> 1).bit_length()
+        depth_temporal = ((ffactor_spatial // self.ffactor_temporal) >> 1).bit_length()
        for i, tgt in enumerate(block_out_channels):
            stage = nn.Module()
            stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
                                                     out_channels=tgt,
                                                     temb_channels=0,
-                                                     conv_op=VideoConv3d, norm_op=RMS_norm)
+                                                     conv_op=conv_op, norm_op=norm_op)
                                        for j in range(num_res_blocks)])
            ch = tgt
            if i < depth:
                nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and downsample_match_channel else ch
-                stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal)
+                stage.downsample = DnSmpl(ch, nxt, tds=i >= depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
                ch = nxt
            self.down.append(stage)
        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
-        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
+        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
-        self.norm_out = RMS_norm(ch)
+        self.norm_out = norm_op(ch)
-        self.conv_out = VideoConv3d(ch, z_channels << 1, 3, 1, 1)
+        self.conv_out = conv_op(ch, z_channels << 1, 3, 1, 1)
        self.regul = comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer()
    def forward(self, x):
        if not self.refiner_vae and x.shape[2] == 1:
            x = x.expand(-1, -1, self.ffactor_temporal, -1, -1)
        x = self.conv_in(x)
        for stage in self.down:
@ -200,31 +216,42 @@ class Encoder(nn.Module):
        skip = x.view(b, c // grp, grp, t, h, w).mean(2)
        out = self.conv_out(F.silu(self.norm_out(x))) + skip
        out = self.regul(out)[0]
-        out = torch.cat((out[:, :, :1], out), dim=2)
+        if self.refiner_vae:
-        out = out.permute(0, 2, 1, 3, 4)
+            out = self.regul(out)[0]
-        b, f_times_2, c, h, w = out.shape
+
-        out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
+            out = torch.cat((out[:, :, :1], out), dim=2)
-        out = out.permute(0, 2, 1, 3, 4).contiguous()
+            out = out.permute(0, 2, 1, 3, 4)
            b, f_times_2, c, h, w = out.shape
            out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
            out = out.permute(0, 2, 1, 3, 4).contiguous()
        return out
 class Decoder(nn.Module):
    def __init__(self, z_channels, out_channels, block_out_channels, num_res_blocks,
-                 ffactor_spatial, ffactor_temporal, upsample_match_channel=True, **_):
+                 ffactor_spatial, ffactor_temporal, upsample_match_channel=True, refiner_vae=True, **_):
        super().__init__()
        block_out_channels = block_out_channels[::-1]
        self.z_channels = z_channels
        self.block_out_channels = block_out_channels
        self.num_res_blocks = num_res_blocks
        self.refiner_vae = refiner_vae
        if self.refiner_vae:
            conv_op = VideoConv3d
            norm_op = RMS_norm
        else:
            conv_op = ops.Conv3d
            norm_op = Normalize
        ch = block_out_channels[0]
-        self.conv_in = VideoConv3d(z_channels, ch, 3)
+        self.conv_in = conv_op(z_channels, ch, kernel_size=3, stride=1, padding=1)
        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
-        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=RMS_norm)
+        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=VideoConv3d, norm_op=RMS_norm)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
        self.up = nn.ModuleList()
        depth = (ffactor_spatial >> 1).bit_length()
@ -235,25 +262,26 @@ class Decoder(nn.Module):
            stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
                                                     out_channels=tgt,
                                                     temb_channels=0,
-                                                     conv_op=VideoConv3d, norm_op=RMS_norm)
+                                                     conv_op=conv_op, norm_op=norm_op)
                                        for j in range(num_res_blocks + 1)])
            ch = tgt
            if i < depth:
                nxt = block_out_channels[i + 1] if i + 1 < len(block_out_channels) and upsample_match_channel else ch
-                stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal)
+                stage.upsample = UpSmpl(ch, nxt, tus=i < depth_temporal, refiner_vae=self.refiner_vae, op=conv_op)
                ch = nxt
            self.up.append(stage)
-        self.norm_out = RMS_norm(ch)
+        self.norm_out = norm_op(ch)
-        self.conv_out = VideoConv3d(ch, out_channels, 3)
+        self.conv_out = conv_op(ch, out_channels, 3, stride=1, padding=1)
    def forward(self, z):
-        z = z.permute(0, 2, 1, 3, 4)
+        if self.refiner_vae:
-        b, f, c, h, w = z.shape
+            z = z.permute(0, 2, 1, 3, 4)
-        z = z.reshape(b, f, 2, c // 2, h, w)
+            b, f, c, h, w = z.shape
-        z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
+            z = z.reshape(b, f, 2, c // 2, h, w)
-        z = z.permute(0, 2, 1, 3, 4)
+            z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
-        z = z[:, :, 1:]
+            z = z.permute(0, 2, 1, 3, 4)
            z = z[:, :, 1:]
        x = self.conv_in(z) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
        x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))
@ -264,4 +292,10 @@ class Decoder(nn.Module):
            if hasattr(stage, 'upsample'):
                x = stage.upsample(x)
-        return self.conv_out(F.silu(self.norm_out(x)))
+        out = self.conv_out(F.silu(self.norm_out(x)))
        if not self.refiner_vae:
            if z.shape[-3] == 1:
                out = out[:, :, -1:]
        return out
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@ -903,7 +903,7 @@ class MotionEncoder_tc(nn.Module):
    def __init__(self,
                 in_dim: int,
                 hidden_dim: int,
-                 num_heads=int,
+                 num_heads: int,
                 need_global=True,
                 dtype=None,
                 device=None,
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@ -468,55 +468,46 @@ class WanVAE(nn.Module):
                                 attn_scales, self.temperal_upsample, dropout)
    def encode(self, x):
-        self.clear_cache()
+        conv_idx = [0]
        feat_map = [None] * count_conv3d(self.decoder)
        ## cache
        t = x.shape[2]
        iter_ = 1 + (t - 1) // 4
        ## 对encode输入的x，按时间拆分为1、4、4、4....
        for i in range(iter_):
-            self._enc_conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.encoder(
                    x[:, :, :1, :, :],
-                    feat_cache=self._enc_feat_map,
+                    feat_cache=feat_map,
-                    feat_idx=self._enc_conv_idx)
+                    feat_idx=conv_idx)
            else:
                out_ = self.encoder(
                    x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
-                    feat_cache=self._enc_feat_map,
+                    feat_cache=feat_map,
-                    feat_idx=self._enc_conv_idx)
+                    feat_idx=conv_idx)
                out = torch.cat([out, out_], 2)
        mu, log_var = self.conv1(out).chunk(2, dim=1)
        self.clear_cache()
        return mu
    def decode(self, z):
-        self.clear_cache()
+        conv_idx = [0]
        feat_map = [None] * count_conv3d(self.decoder)
        # z: [b,c,t,h,w]
        iter_ = z.shape[2]
        x = self.conv2(z)
        for i in range(iter_):
-            self._conv_idx = [0]
+            conv_idx = [0]
            if i == 0:
                out = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
+                    feat_cache=feat_map,
-                    feat_idx=self._conv_idx)
+                    feat_idx=conv_idx)
            else:
                out_ = self.decoder(
                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
+                    feat_cache=feat_map,
-                    feat_idx=self._conv_idx)
+                    feat_idx=conv_idx)
                out = torch.cat([out, out_], 2)
        self.clear_cache()
        return out
    def clear_cache(self):
        self._conv_num = count_conv3d(self.decoder)
        self._conv_idx = [0]
        self._feat_map = [None] * self._conv_num
        #cache encode
        self._enc_conv_num = count_conv3d(self.encoder)
        self._enc_conv_idx = [0]
        self._enc_feat_map = [None] * self._enc_conv_num
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -332,35 +332,51 @@ class VAE:
                self.first_stage_model = StageC_coder()
                self.downscale_ratio = 32
                self.latent_channels = 16
            elif "decoder.conv_in.weight" in sd and sd['decoder.conv_in.weight'].shape[1] == 64:
                ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
                self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
                self.downscale_ratio = 32
                self.upscale_ratio = 32
                self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
                self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
                                                            encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
                                                            decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
                self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
                self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
            elif "decoder.conv_in.weight" in sd:
-                #default SD1.x/SD2.x VAE parameters
+                if sd['decoder.conv_in.weight'].shape[1] == 64:
-                ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
+                    ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
-
+                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
-                if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE
+                    self.downscale_ratio = 32
-                    ddconfig['ch_mult'] = [1, 2, 4]
+                    self.upscale_ratio = 32
-                    self.downscale_ratio = 4
+                    self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
                    self.upscale_ratio = 4
                self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
                if 'post_quant_conv.weight' in sd:
                    self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
                else:
                    self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
-                                                                encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
+                                                                encoder_config={'target': "comfy.ldm.hunyuan_video.vae.Encoder", 'params': ddconfig},
-                                                                decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
+                                                                decoder_config={'target': "comfy.ldm.hunyuan_video.vae.Decoder", 'params': ddconfig})
                    self.memory_used_encode = lambda shape, dtype: (700 * shape[2] * shape[3]) * model_management.dtype_size(dtype)
                    self.memory_used_decode = lambda shape, dtype: (700 * shape[2] * shape[3] * 32 * 32) * model_management.dtype_size(dtype)
                elif sd['decoder.conv_in.weight'].shape[1] == 32:
                    ddconfig = {"block_out_channels": [128, 256, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 16, "ffactor_temporal": 4, "downsample_match_channel": True, "upsample_match_channel": True, "refiner_vae": False}
                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
                    self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
                    self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 16, 16)
                    self.upscale_index_formula = (4, 16, 16)
                    self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 16, 16)
                    self.downscale_index_formula = (4, 16, 16)
                    self.latent_dim = 3
                    self.not_video = True
                    self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
                                                                encoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Encoder", 'params': ddconfig},
                                                                decoder_config={'target': "comfy.ldm.hunyuan_video.vae_refiner.Decoder", 'params': ddconfig})
                    self.memory_used_encode = lambda shape, dtype: (2800 * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)
                    self.memory_used_decode = lambda shape, dtype: (2800 * shape[-3] * shape[-2] * shape[-1] * 16 * 16) * model_management.dtype_size(dtype)
                else:
                    #default SD1.x/SD2.x VAE parameters
                    ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
                    if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE
                        ddconfig['ch_mult'] = [1, 2, 4]
                        self.downscale_ratio = 4
                        self.upscale_ratio = 4
                    self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
                    if 'post_quant_conv.weight' in sd:
                        self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
                    else:
                        self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
                                                                    encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
                                                                    decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
            elif "decoder.layers.1.layers.0.beta" in sd:
                self.first_stage_model = AudioOobleckVAE()
                self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
@ -636,6 +652,7 @@ class VAE:
    def decode(self, samples_in, vae_options={}):
        self.throw_exception_if_invalid()
        pixel_samples = None
        do_tile = False
        try:
            memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
            model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
@ -651,6 +668,13 @@ class VAE:
                pixel_samples[x:x+batch_number] = out
        except model_management.OOM_EXCEPTION:
            logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
            #NOTE: We don't know what tensors were allocated to stack variables at the time of the
            #exception and the exception itself refs them all until we get out of this except block.
            #So we just set a flag for tiler fallback so that tensor gc can happen once the
            #exception is fully off the books.
            do_tile = True
        if do_tile:
            dims = samples_in.ndim - 2
            if dims == 1 or self.extra_1d_channel is not None:
                pixel_samples = self.decode_tiled_1d(samples_in)
@ -697,6 +721,7 @@ class VAE:
        self.throw_exception_if_invalid()
        pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
        pixel_samples = pixel_samples.movedim(-1, 1)
        do_tile = False
        if self.latent_dim == 3 and pixel_samples.ndim < 5:
            if not self.not_video:
                pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
@ -718,6 +743,13 @@ class VAE:
        except model_management.OOM_EXCEPTION:
            logging.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
            #NOTE: We don't know what tensors were allocated to stack variables at the time of the
            #exception and the exception itself refs them all until we get out of this except block.
            #So we just set a flag for tiler fallback so that tensor gc can happen once the
            #exception is fully off the books.
            do_tile = True
        if do_tile:
            if self.latent_dim == 3:
                tile = 256
                overlap = tile // 4
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@ -1605,6 +1605,7 @@ class _IO:
    Model = Model
    ClipVision = ClipVision
    ClipVisionOutput = ClipVisionOutput
    AudioEncoder = AudioEncoder
    AudioEncoderOutput = AudioEncoderOutput
    StyleModel = StyleModel
    Gligen = Gligen
--- a/comfy_api_nodes/apinode_utils.py
+++ b/comfy_api_nodes/apinode_utils.py
@ -152,7 +152,7 @@ def validate_aspect_ratio(
            raise TypeError(
                f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
            )
-        elif calculated_ratio > maximum_ratio:
+        if calculated_ratio > maximum_ratio:
            raise TypeError(
                f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
            )
--- a/comfy_api_nodes/apis/init.py
+++ b/comfy_api_nodes/apis/init.py
@ -2,6 +2,7 @@
 #   filename:  filtered-openapi.yaml
 #   timestamp: 2025-07-30T08:54:00+00:00
 # pylint: disable
 from __future__ import annotations
 from datetime import date, datetime
@ -1320,6 +1321,7 @@ class KlingTextToVideoModelName(str, Enum):
    kling_v1 = 'kling-v1'
    kling_v1_6 = 'kling-v1-6'
    kling_v2_1_master = 'kling-v2-1-master'
    kling_v2_5_turbo = 'kling-v2-5-turbo'
 class KlingVideoGenAspectRatio(str, Enum):
@ -1354,6 +1356,7 @@ class KlingVideoGenModelName(str, Enum):
    kling_v2_master = 'kling-v2-master'
    kling_v2_1 = 'kling-v2-1'
    kling_v2_1_master = 'kling-v2-1-master'
    kling_v2_5_turbo = 'kling-v2-5-turbo'
 class KlingVideoResult(BaseModel):
--- a/comfy_api_nodes/apis/client.py
+++ b/comfy_api_nodes/apis/client.py
@ -95,6 +95,7 @@ import aiohttp
 import asyncio
 import logging
 import io
 import os
 import socket
 from aiohttp.client_exceptions import ClientError, ClientResponseError
 from typing import Dict, Type, Optional, Any, TypeVar, Generic, Callable, Tuple
@ -499,7 +500,9 @@ class ApiClient:
        else:
            raise ValueError("File must be BytesIO or str path")
-        operation_id = f"upload_{upload_url.split('/')[-1]}_{uuid.uuid4().hex[:8]}"
+        parsed = urlparse(upload_url)
        basename = os.path.basename(parsed.path) or parsed.netloc or "upload"
        operation_id = f"upload_{basename}_{uuid.uuid4().hex[:8]}"
        request_logger.log_request_response(
            operation_id=operation_id,
            request_method="PUT",
@ -532,7 +535,7 @@ class ApiClient:
                    request_method="PUT",
                    request_url=upload_url,
                    response_status_code=e.status if hasattr(e, "status") else None,
-                    response_headers=dict(e.headers) if getattr(e, "headers") else None,
+                    response_headers=dict(e.headers) if hasattr(e, "headers") else None,
                    response_content=None,
                    error_message=f"{type(e).__name__}: {str(e)}",
                )
--- a/comfy_api_nodes/apis/request_logger.py
+++ b/comfy_api_nodes/apis/request_logger.py
@ -4,16 +4,18 @@ import os
 import datetime
 import json
 import logging
 import re
 import hashlib
 from typing import Any
 import folder_paths
 # Get the logger instance
 logger = logging.getLogger(__name__)
 def get_log_directory():
-    """
+    """Ensures the API log directory exists within ComfyUI's temp directory and returns its path."""
    Ensures the API log directory exists within ComfyUI's temp directory
    and returns its path.
    """
    base_temp_dir = folder_paths.get_temp_directory()
    log_dir = os.path.join(base_temp_dir, "api_logs")
    try:
@ -24,42 +26,77 @@ def get_log_directory():
        return base_temp_dir
    return log_dir
-def _format_data_for_logging(data):
+
 def _sanitize_filename_component(name: str) -> str:
    if not name:
        return "log"
    sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", name)  # Replace disallowed characters with underscore
    sanitized = sanitized.strip(" ._")  # Windows: trailing dots or spaces are not allowed
    if not sanitized:
        sanitized = "log"
    return sanitized
 def _short_hash(*parts: str, length: int = 10) -> str:
    return hashlib.sha1(("|".join(parts)).encode("utf-8")).hexdigest()[:length]
 def _build_log_filepath(log_dir: str, operation_id: str, request_url: str) -> str:
    """Build log filepath. We keep it well under common path length limits aiming for <= 240 characters total."""
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
    slug = _sanitize_filename_component(operation_id)  # Best-effort human-readable slug from operation_id
    h = _short_hash(operation_id or "", request_url or "")  # Short hash ties log to the full operation and URL
    # Compute how much room we have for the slug given the directory length
    # Keep total path length reasonably below ~260 on Windows.
    max_total_path = 240
    prefix = f"{timestamp}_"
    suffix = f"_{h}.log"
    if not slug:
        slug = "op"
    max_filename_len = max(60, max_total_path - len(log_dir) - 1)
    max_slug_len = max(8, max_filename_len - len(prefix) - len(suffix))
    if len(slug) > max_slug_len:
        slug = slug[:max_slug_len].rstrip(" ._-")
    return os.path.join(log_dir, f"{prefix}{slug}{suffix}")
 def _format_data_for_logging(data: Any) -> str:
    """Helper to format data (dict, str, bytes) for logging."""
    if isinstance(data, bytes):
        try:
-            return data.decode('utf-8')  # Try to decode as text
+            return data.decode("utf-8")  # Try to decode as text
        except UnicodeDecodeError:
            return f"[Binary data of length {len(data)} bytes]"
    elif isinstance(data, (dict, list)):
        try:
            return json.dumps(data, indent=2, ensure_ascii=False)
        except TypeError:
-            return str(data) # Fallback for non-serializable objects
+            return str(data)  # Fallback for non-serializable objects
    return str(data)
 def log_request_response(
    operation_id: str,
    request_method: str,
    request_url: str,
    request_headers: dict | None = None,
    request_params: dict | None = None,
-    request_data: any = None,
+    request_data: Any = None,
    response_status_code: int | None = None,
    response_headers: dict | None = None,
-    response_content: any = None,
+    response_content: Any = None,
-    error_message: str | None = None
+    error_message: str | None = None,
 ):
    """
    Logs API request and response details to a file in the temp/api_logs directory.
    Filenames are sanitized and length-limited for cross-platform safety.
    If we still fail to write, we fall back to appending into api.log.
    """
    log_dir = get_log_directory()
-    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    filepath = _build_log_filepath(log_dir, operation_id, request_url)
    filename = f"{timestamp}_{operation_id.replace('/', '_').replace(':', '_')}.log"
    filepath = os.path.join(log_dir, filename)
    log_content = []
    log_content: list[str] = []
    log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
    log_content.append(f"Operation ID: {operation_id}")
    log_content.append("-" * 30 + " REQUEST " + "-" * 30)
@ -69,7 +106,7 @@ def log_request_response(
        log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
    if request_params:
        log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
-    if request_data:
+    if request_data is not None:
        log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
    log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
@ -77,7 +114,7 @@ def log_request_response(
        log_content.append(f"Status Code: {response_status_code}")
    if response_headers:
        log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
-    if response_content:
+    if response_content is not None:
        log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
    if error_message:
        log_content.append(f"Error:\n{error_message}")
@ -89,6 +126,7 @@ def log_request_response(
    except Exception as e:
        logger.error(f"Error writing API log to {filepath}: {e}")
 if __name__ == '__main__':
    # Example usage (for testing the logger directly)
    logger.setLevel(logging.DEBUG)
--- a/comfy_api_nodes/apis/rodin_api.py
+++ b/comfy_api_nodes/apis/rodin_api.py
@ -52,7 +52,3 @@ class RodinResourceItem(BaseModel):
 class Rodin3DDownloadResponse(BaseModel):
    list: List[RodinResourceItem] = Field(..., description="Source List")
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@ -39,6 +39,7 @@ from comfy_api_nodes.apinode_utils import (
    tensor_to_base64_string,
    bytesio_to_image_tensor,
 )
 from comfy_api.util import VideoContainer, VideoCodec
 GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
@ -310,7 +311,7 @@ class GeminiNode(ComfyNodeABC):
        Returns:
            List of GeminiPart objects containing the encoded video.
        """
-        from comfy_api.util import VideoContainer, VideoCodec
+
        base_64_string = video_to_base64_string(
            video_input,
            container_format=VideoContainer.MP4,
@ -490,7 +491,6 @@ class GeminiInputFiles(ComfyNodeABC):
        # Use base64 string directly, not the data URI
        with open(file_path, "rb") as f:
            file_content = f.read()
        import base64
        base64_str = base64.b64encode(file_content).decode("utf-8")
        return GeminiPart(
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@ -2,11 +2,7 @@ import logging
 from typing import Any, Callable, Optional, TypeVar
 import torch
 from typing_extensions import override
-from comfy_api_nodes.util.validation_utils import (
+from comfy_api_nodes.util.validation_utils import validate_image_dimensions
    get_image_dimensions,
    validate_image_dimensions,
 )
 from comfy_api_nodes.apis import (
    MoonvalleyTextToVideoRequest,
@ -132,47 +128,6 @@ def validate_prompts(
    return True
 def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
    # inference validation
    # T = num_frames
    # in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
    # with image conditioning: H*W must be divisible by 8192
    # without image conditioning: T divisible by 32
    if num_frames_in and not num_frames_in % 16 == 0:
        return False, ("The input video total frame count must be divisible by 16!")
    if height % 8 != 0 or width % 8 != 0:
        return False, (
            f"Height ({height}) and width ({width}) must be " "divisible by 8"
        )
    if with_frame_conditioning:
        if (height * width) % 8192 != 0:
            return False, (
                f"Height * width ({height * width}) must be "
                "divisible by 8192 for frame conditioning"
            )
    else:
        if num_frames_in and not num_frames_in % 32 == 0:
            return False, ("The input video total frame count must be divisible by 32!")
 def validate_input_image(
    image: torch.Tensor, with_frame_conditioning: bool = False
 ) -> None:
    """
    Validates the input image adheres to the expectations of the API:
    - The image resolution should not be less than 300*300px
    - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
    """
    height, width = get_image_dimensions(image)
    validate_input_media(width, height, with_frame_conditioning)
    validate_image_dimensions(
        image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
    )
 def validate_video_to_video_input(video: VideoInput) -> VideoInput:
    """
    Validates and processes video input for Moonvalley Video-to-Video generation.
@ -499,7 +454,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
        seed: int,
        steps: int,
    ) -> comfy_io.NodeOutput:
-        validate_input_image(image, True)
+        validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
        width_height = parse_width_height_from_res(resolution)
@ -518,7 +473,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
            height=width_height["height"],
            use_negative_prompts=True,
        )
-        """Upload image to comfy backend to have a URL available for further processing"""
+
        # Get MIME type from tensor - assuming PNG format for image tensors
        mime_type = "image/png"
@ -636,7 +591,6 @@ class MoonvalleyVideo2VideoNode(comfy_io.ComfyNode):
        validated_video = validate_video_to_video_input(video)
        video_url = await upload_video_to_comfyapi(validated_video, auth_kwargs=auth)
        """Validate prompts and inference input"""
        validate_prompts(prompt, negative_prompt)
        # Only include motion_intensity for Motion Transfer
--- a/comfy_api_nodes/nodes_pika.py
+++ b/comfy_api_nodes/nodes_pika.py
--- a/comfy_api_nodes/nodes_pixverse.py
+++ b/comfy_api_nodes/nodes_pixverse.py
@ -1,5 +1,7 @@
 from inspect import cleandoc
 from typing import Optional
 from typing_extensions import override
 from io import BytesIO
 from comfy_api_nodes.apis.pixverse_api import (
    PixverseTextVideoRequest,
    PixverseImageVideoRequest,
@ -26,12 +28,11 @@ from comfy_api_nodes.apinode_utils import (
    tensor_to_bytesio,
    validate_string,
 )
 from comfy.comfy_types.node_typing import IO, ComfyNodeABC
 from comfy_api.input_impl import VideoFromFile
 from comfy_api.latest import ComfyExtension, io as comfy_io
 import torch
 import aiohttp
 from io import BytesIO
 AVERAGE_DURATION_T2V = 32
@ -72,100 +73,101 @@ async def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None):
    return response_upload.Resp.img_id
-class PixverseTemplateNode:
+class PixverseTemplateNode(comfy_io.ComfyNode):
    """
    Select template for PixVerse Video generation.
    """
-    RETURN_TYPES = (PixverseIO.TEMPLATE,)
+    @classmethod
-    RETURN_NAMES = ("pixverse_template",)
+    def define_schema(cls) -> comfy_io.Schema:
-    FUNCTION = "create_template"
+        return comfy_io.Schema(
-    CATEGORY = "api node/video/PixVerse"
+            node_id="PixverseTemplateNode",
            display_name="PixVerse Template",
            category="api node/video/PixVerse",
            inputs=[
                comfy_io.Combo.Input("template", options=[list(pixverse_templates.keys())]),
            ],
            outputs=[comfy_io.Custom(PixverseIO.TEMPLATE).Output(display_name="pixverse_template")],
        )
    @classmethod
-    def INPUT_TYPES(s):
+    def execute(cls, template: str) -> comfy_io.NodeOutput:
        return {
            "required": {
                "template": (list(pixverse_templates.keys()),),
            }
        }
    def create_template(self, template: str):
        template_id = pixverse_templates.get(template, None)
        if template_id is None:
            raise Exception(f"Template '{template}' is not recognized.")
        # just return the integer
-        return (template_id,)
+        return comfy_io.NodeOutput(template_id)
-class PixverseTextToVideoNode(ComfyNodeABC):
+class PixverseTextToVideoNode(comfy_io.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """
-    RETURN_TYPES = (IO.VIDEO,)
+    @classmethod
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
+    def define_schema(cls) -> comfy_io.Schema:
-    FUNCTION = "api_call"
+        return comfy_io.Schema(
-    API_NODE = True
+            node_id="PixverseTextToVideoNode",
-    CATEGORY = "api node/video/PixVerse"
+            display_name="PixVerse Text to Video",
            category="api node/video/PixVerse",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
                comfy_io.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the video generation",
                ),
                comfy_io.Combo.Input(
                    "aspect_ratio",
                    options=[ratio.value for ratio in PixverseAspectRatio],
                ),
                comfy_io.Combo.Input(
                    "quality",
                    options=[resolution.value for resolution in PixverseQuality],
                    default=PixverseQuality.res_540p,
                ),
                comfy_io.Combo.Input(
                    "duration_seconds",
                    options=[dur.value for dur in PixverseDuration],
                ),
                comfy_io.Combo.Input(
                    "motion_mode",
                    options=[mode.value for mode in PixverseMotionMode],
                ),
                comfy_io.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    control_after_generate=True,
                    tooltip="Seed for video generation.",
                ),
                comfy_io.String.Input(
                    "negative_prompt",
                    default="",
                    multiline=True,
                    tooltip="An optional text description of undesired elements on an image.",
                    optional=True,
                ),
                comfy_io.Custom(PixverseIO.TEMPLATE).Input(
                    "pixverse_template",
                    tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
                    optional=True,
                ),
            ],
            outputs=[comfy_io.Video.Output()],
            hidden=[
                comfy_io.Hidden.auth_token_comfy_org,
                comfy_io.Hidden.api_key_comfy_org,
                comfy_io.Hidden.unique_id,
            ],
            is_api_node=True,
        )
    @classmethod
-    def INPUT_TYPES(s):
+    async def execute(
-        return {
+        cls,
            "required": {
                "prompt": (
                    IO.STRING,
                    {
                        "multiline": True,
                        "default": "",
                        "tooltip": "Prompt for the video generation",
                    },
                ),
                "aspect_ratio": ([ratio.value for ratio in PixverseAspectRatio],),
                "quality": (
                    [resolution.value for resolution in PixverseQuality],
                    {
                        "default": PixverseQuality.res_540p,
                    },
                ),
                "duration_seconds": ([dur.value for dur in PixverseDuration],),
                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
                "seed": (
                    IO.INT,
                    {
                        "default": 0,
                        "min": 0,
                        "max": 2147483647,
                        "control_after_generate": True,
                        "tooltip": "Seed for video generation.",
                    },
                ),
            },
            "optional": {
                "negative_prompt": (
                    IO.STRING,
                    {
                        "default": "",
                        "forceInput": True,
                        "tooltip": "An optional text description of undesired elements on an image.",
                    },
                ),
                "pixverse_template": (
                    PixverseIO.TEMPLATE,
                    {
                        "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
                "unique_id": "UNIQUE_ID",
            },
        }
    async def api_call(
        self,
        prompt: str,
        aspect_ratio: str,
        quality: str,
@ -174,9 +176,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
        seed,
        negative_prompt: str = None,
        pixverse_template: int = None,
-        unique_id: Optional[str] = None,
+    ) -> comfy_io.NodeOutput:
        **kwargs,
    ):
        validate_string(prompt, strip_whitespace=False)
        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -186,6 +186,10 @@ class PixverseTextToVideoNode(ComfyNodeABC):
        elif duration_seconds != PixverseDuration.dur_5:
            motion_mode = PixverseMotionMode.normal
        auth = {
            "auth_token": cls.hidden.auth_token_comfy_org,
            "comfy_api_key": cls.hidden.api_key_comfy_org,
        }
        operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path="/proxy/pixverse/video/text/generate",
@ -203,7 +207,7 @@ class PixverseTextToVideoNode(ComfyNodeABC):
                template_id=pixverse_template,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()
@ -224,8 +228,8 @@ class PixverseTextToVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
-            node_id=unique_id,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_T2V,
        )
@ -233,77 +237,75 @@ class PixverseTextToVideoNode(ComfyNodeABC):
        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
-class PixverseImageToVideoNode(ComfyNodeABC):
+class PixverseImageToVideoNode(comfy_io.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """
-    RETURN_TYPES = (IO.VIDEO,)
+    @classmethod
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
+    def define_schema(cls) -> comfy_io.Schema:
-    FUNCTION = "api_call"
+        return comfy_io.Schema(
-    API_NODE = True
+            node_id="PixverseImageToVideoNode",
-    CATEGORY = "api node/video/PixVerse"
+            display_name="PixVerse Image to Video",
            category="api node/video/PixVerse",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
                comfy_io.Image.Input("image"),
                comfy_io.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the video generation",
                ),
                comfy_io.Combo.Input(
                    "quality",
                    options=[resolution.value for resolution in PixverseQuality],
                    default=PixverseQuality.res_540p,
                ),
                comfy_io.Combo.Input(
                    "duration_seconds",
                    options=[dur.value for dur in PixverseDuration],
                ),
                comfy_io.Combo.Input(
                    "motion_mode",
                    options=[mode.value for mode in PixverseMotionMode],
                ),
                comfy_io.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    control_after_generate=True,
                    tooltip="Seed for video generation.",
                ),
                comfy_io.String.Input(
                    "negative_prompt",
                    default="",
                    multiline=True,
                    tooltip="An optional text description of undesired elements on an image.",
                    optional=True,
                ),
                comfy_io.Custom(PixverseIO.TEMPLATE).Input(
                    "pixverse_template",
                    tooltip="An optional template to influence style of generation, created by the PixVerse Template node.",
                    optional=True,
                ),
            ],
            outputs=[comfy_io.Video.Output()],
            hidden=[
                comfy_io.Hidden.auth_token_comfy_org,
                comfy_io.Hidden.api_key_comfy_org,
                comfy_io.Hidden.unique_id,
            ],
            is_api_node=True,
        )
    @classmethod
-    def INPUT_TYPES(s):
+    async def execute(
-        return {
+        cls,
            "required": {
                "image": (IO.IMAGE,),
                "prompt": (
                    IO.STRING,
                    {
                        "multiline": True,
                        "default": "",
                        "tooltip": "Prompt for the video generation",
                    },
                ),
                "quality": (
                    [resolution.value for resolution in PixverseQuality],
                    {
                        "default": PixverseQuality.res_540p,
                    },
                ),
                "duration_seconds": ([dur.value for dur in PixverseDuration],),
                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
                "seed": (
                    IO.INT,
                    {
                        "default": 0,
                        "min": 0,
                        "max": 2147483647,
                        "control_after_generate": True,
                        "tooltip": "Seed for video generation.",
                    },
                ),
            },
            "optional": {
                "negative_prompt": (
                    IO.STRING,
                    {
                        "default": "",
                        "forceInput": True,
                        "tooltip": "An optional text description of undesired elements on an image.",
                    },
                ),
                "pixverse_template": (
                    PixverseIO.TEMPLATE,
                    {
                        "tooltip": "An optional template to influence style of generation, created by the PixVerse Template node."
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
                "unique_id": "UNIQUE_ID",
            },
        }
    async def api_call(
        self,
        image: torch.Tensor,
        prompt: str,
        quality: str,
@ -312,11 +314,13 @@ class PixverseImageToVideoNode(ComfyNodeABC):
        seed,
        negative_prompt: str = None,
        pixverse_template: int = None,
-        unique_id: Optional[str] = None,
+    ) -> comfy_io.NodeOutput:
        **kwargs,
    ):
        validate_string(prompt, strip_whitespace=False)
-        img_id = await upload_image_to_pixverse(image, auth_kwargs=kwargs)
+        auth = {
            "auth_token": cls.hidden.auth_token_comfy_org,
            "comfy_api_key": cls.hidden.api_key_comfy_org,
        }
        img_id = await upload_image_to_pixverse(image, auth_kwargs=auth)
        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -343,7 +347,7 @@ class PixverseImageToVideoNode(ComfyNodeABC):
                template_id=pixverse_template,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()
@ -364,8 +368,8 @@ class PixverseImageToVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
-            node_id=unique_id,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_I2V,
        )
@ -373,72 +377,71 @@ class PixverseImageToVideoNode(ComfyNodeABC):
        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
-class PixverseTransitionVideoNode(ComfyNodeABC):
+class PixverseTransitionVideoNode(comfy_io.ComfyNode):
    """
    Generates videos based on prompt and output_size.
    """
-    RETURN_TYPES = (IO.VIDEO,)
+    @classmethod
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
+    def define_schema(cls) -> comfy_io.Schema:
-    FUNCTION = "api_call"
+        return comfy_io.Schema(
-    API_NODE = True
+            node_id="PixverseTransitionVideoNode",
-    CATEGORY = "api node/video/PixVerse"
+            display_name="PixVerse Transition Video",
            category="api node/video/PixVerse",
            description=cleandoc(cls.__doc__ or ""),
            inputs=[
                comfy_io.Image.Input("first_frame"),
                comfy_io.Image.Input("last_frame"),
                comfy_io.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Prompt for the video generation",
                ),
                comfy_io.Combo.Input(
                    "quality",
                    options=[resolution.value for resolution in PixverseQuality],
                    default=PixverseQuality.res_540p,
                ),
                comfy_io.Combo.Input(
                    "duration_seconds",
                    options=[dur.value for dur in PixverseDuration],
                ),
                comfy_io.Combo.Input(
                    "motion_mode",
                    options=[mode.value for mode in PixverseMotionMode],
                ),
                comfy_io.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    control_after_generate=True,
                    tooltip="Seed for video generation.",
                ),
                comfy_io.String.Input(
                    "negative_prompt",
                    default="",
                    multiline=True,
                    tooltip="An optional text description of undesired elements on an image.",
                    optional=True,
                ),
            ],
            outputs=[comfy_io.Video.Output()],
            hidden=[
                comfy_io.Hidden.auth_token_comfy_org,
                comfy_io.Hidden.api_key_comfy_org,
                comfy_io.Hidden.unique_id,
            ],
            is_api_node=True,
        )
    @classmethod
-    def INPUT_TYPES(s):
+    async def execute(
-        return {
+        cls,
            "required": {
                "first_frame": (IO.IMAGE,),
                "last_frame": (IO.IMAGE,),
                "prompt": (
                    IO.STRING,
                    {
                        "multiline": True,
                        "default": "",
                        "tooltip": "Prompt for the video generation",
                    },
                ),
                "quality": (
                    [resolution.value for resolution in PixverseQuality],
                    {
                        "default": PixverseQuality.res_540p,
                    },
                ),
                "duration_seconds": ([dur.value for dur in PixverseDuration],),
                "motion_mode": ([mode.value for mode in PixverseMotionMode],),
                "seed": (
                    IO.INT,
                    {
                        "default": 0,
                        "min": 0,
                        "max": 2147483647,
                        "control_after_generate": True,
                        "tooltip": "Seed for video generation.",
                    },
                ),
            },
            "optional": {
                "negative_prompt": (
                    IO.STRING,
                    {
                        "default": "",
                        "forceInput": True,
                        "tooltip": "An optional text description of undesired elements on an image.",
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
                "unique_id": "UNIQUE_ID",
            },
        }
    async def api_call(
        self,
        first_frame: torch.Tensor,
        last_frame: torch.Tensor,
        prompt: str,
@ -447,12 +450,14 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
        motion_mode: str,
        seed,
        negative_prompt: str = None,
-        unique_id: Optional[str] = None,
+    ) -> comfy_io.NodeOutput:
        **kwargs,
    ):
        validate_string(prompt, strip_whitespace=False)
-        first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=kwargs)
+        auth = {
-        last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=kwargs)
+            "auth_token": cls.hidden.auth_token_comfy_org,
            "comfy_api_key": cls.hidden.api_key_comfy_org,
        }
        first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=auth)
        last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=auth)
        # 1080p is limited to 5 seconds duration
        # only normal motion_mode supported for 1080p or for non-5 second duration
@ -479,7 +484,7 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
                negative_prompt=negative_prompt if negative_prompt else None,
                seed=seed,
            ),
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
        )
        response_api = await operation.execute()
@ -500,8 +505,8 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
                PixverseStatus.deleted,
            ],
            status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=kwargs,
+            auth_kwargs=auth,
-            node_id=unique_id,
+            node_id=cls.hidden.unique_id,
            result_url_extractor=get_video_url_from_response,
            estimated_duration=AVERAGE_DURATION_T2V,
        )
@ -509,19 +514,19 @@ class PixverseTransitionVideoNode(ComfyNodeABC):
        async with aiohttp.ClientSession() as session:
            async with session.get(response_poll.Resp.url) as vid_response:
-                return (VideoFromFile(BytesIO(await vid_response.content.read())),)
+                return comfy_io.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
-NODE_CLASS_MAPPINGS = {
+class PixVerseExtension(ComfyExtension):
-    "PixverseTextToVideoNode": PixverseTextToVideoNode,
+    @override
-    "PixverseImageToVideoNode": PixverseImageToVideoNode,
+    async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
-    "PixverseTransitionVideoNode": PixverseTransitionVideoNode,
+        return [
-    "PixverseTemplateNode": PixverseTemplateNode,
+            PixverseTextToVideoNode,
-}
+            PixverseImageToVideoNode,
            PixverseTransitionVideoNode,
            PixverseTemplateNode,
        ]
-NODE_DISPLAY_NAME_MAPPINGS = {
+
-    "PixverseTextToVideoNode": "PixVerse Text to Video",
+async def comfy_entrypoint() -> PixVerseExtension:
-    "PixverseImageToVideoNode": "PixVerse Image to Video",
+    return PixVerseExtension()
    "PixverseTransitionVideoNode": "PixVerse Transition Video",
    "PixverseTemplateNode": "PixVerse Template",
 }
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@ -38,48 +38,48 @@ from PIL import UnidentifiedImageError
 async def handle_recraft_file_request(
-        image: torch.Tensor,
+    image: torch.Tensor,
-        path: str,
+    path: str,
-        mask: torch.Tensor=None,
+    mask: torch.Tensor=None,
-        total_pixels=4096*4096,
+    total_pixels=4096*4096,
-        timeout=1024,
+    timeout=1024,
-        request=None,
+    request=None,
-        auth_kwargs: dict[str,str] = None,
+    auth_kwargs: dict[str,str] = None,
-    ) -> list[BytesIO]:
+) -> list[BytesIO]:
-        """
+    """
-        Handle sending common Recraft file-only request to get back file bytes.
+    Handle sending common Recraft file-only request to get back file bytes.
-        """
+    """
-        if request is None:
+    if request is None:
-            request = EmptyRequest()
+        request = EmptyRequest()
-        files = {
+    files = {
-            'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
+        'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
-        }
+    }
-        if mask is not None:
+    if mask is not None:
-            files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
+        files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
-        operation = SynchronousOperation(
+    operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
+        endpoint=ApiEndpoint(
-                path=path,
+            path=path,
-                method=HttpMethod.POST,
+            method=HttpMethod.POST,
-                request_model=type(request),
+            request_model=type(request),
-                response_model=RecraftImageGenerationResponse,
+            response_model=RecraftImageGenerationResponse,
-            ),
+        ),
-            request=request,
+        request=request,
-            files=files,
+        files=files,
-            content_type="multipart/form-data",
+        content_type="multipart/form-data",
-            auth_kwargs=auth_kwargs,
+        auth_kwargs=auth_kwargs,
-            multipart_parser=recraft_multipart_parser,
+        multipart_parser=recraft_multipart_parser,
-        )
+    )
-        response: RecraftImageGenerationResponse = await operation.execute()
+    response: RecraftImageGenerationResponse = await operation.execute()
-        all_bytesio = []
+    all_bytesio = []
-        if response.image is not None:
+    if response.image is not None:
-            all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
+        all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
-        else:
+    else:
-            for data in response.data:
+        for data in response.data:
-                all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
+            all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
-        return all_bytesio
+    return all_bytesio
 def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, converted_to_check: list[list]=None, is_list=False) -> dict:
@ -107,7 +107,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
        # if list already exists exists, just extend list with data
        for check_list in lists_to_check:
            for conv_tuple in check_list:
-                if conv_tuple[0] == parent_key and type(conv_tuple[1]) is list:
+                if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
                    conv_tuple[1].append(formatter(data))
                    return True
        return False
@ -119,7 +119,7 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
    if formatter is None:
        formatter = lambda v: v  # Multipart representation of value
-    if type(data) is not dict:
+    if not isinstance(data, dict):
        # if list already exists exists, just extend list with data
        added = handle_converted_lists(data, parent_key, converted_to_check)
        if added:
@ -136,9 +136,9 @@ def recraft_multipart_parser(data, parent_key=None, formatter: callable=None, co
    for key, value in data.items():
        current_key = key if parent_key is None else f"{parent_key}[{key}]"
-        if type(value) is dict:
+        if isinstance(value, dict):
            converted.extend(recraft_multipart_parser(value, current_key, formatter, next_check).items())
-        elif type(value) is list:
+        elif isinstance(value, list):
            for ind, list_value in enumerate(value):
                iter_key = f"{current_key}[]"
                converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
--- a/comfy_api_nodes/nodes_rodin.py
+++ b/comfy_api_nodes/nodes_rodin.py
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@ -360,7 +360,7 @@ class RecordAudio:
    def load(self, audio):
        audio_path = folder_paths.get_annotated_filepath(audio)
-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load(audio_path)
        audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
        return (audio, )
--- a/comfy_extras/nodes_audio_encoder.py
+++ b/comfy_extras/nodes_audio_encoder.py
@ -1,44 +1,62 @@
 import folder_paths
 import comfy.audio_encoders.audio_encoders
 import comfy.utils
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
-class AudioEncoderLoader:
+class AudioEncoderLoader(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls) -> io.Schema:
-        return {"required": { "audio_encoder_name": (folder_paths.get_filename_list("audio_encoders"), ),
+        return io.Schema(
-                             }}
+            node_id="AudioEncoderLoader",
-    RETURN_TYPES = ("AUDIO_ENCODER",)
+            category="loaders",
-    FUNCTION = "load_model"
+            inputs=[
                io.Combo.Input(
                    "audio_encoder_name",
                    options=folder_paths.get_filename_list("audio_encoders"),
                ),
            ],
            outputs=[io.AudioEncoder.Output()],
        )
-    CATEGORY = "loaders"
+    @classmethod
-
+    def execute(cls, audio_encoder_name) -> io.NodeOutput:
    def load_model(self, audio_encoder_name):
        audio_encoder_name = folder_paths.get_full_path_or_raise("audio_encoders", audio_encoder_name)
        sd = comfy.utils.load_torch_file(audio_encoder_name, safe_load=True)
        audio_encoder = comfy.audio_encoders.audio_encoders.load_audio_encoder_from_sd(sd)
        if audio_encoder is None:
            raise RuntimeError("ERROR: audio encoder file is invalid and does not contain a valid model.")
-        return (audio_encoder,)
+        return io.NodeOutput(audio_encoder)
-class AudioEncoderEncode:
+class AudioEncoderEncode(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls) -> io.Schema:
-        return {"required": { "audio_encoder": ("AUDIO_ENCODER",),
+        return io.Schema(
-                              "audio": ("AUDIO",),
+            node_id="AudioEncoderEncode",
-                             }}
+            category="conditioning",
-    RETURN_TYPES = ("AUDIO_ENCODER_OUTPUT",)
+            inputs=[
-    FUNCTION = "encode"
+                io.AudioEncoder.Input("audio_encoder"),
                io.Audio.Input("audio"),
            ],
            outputs=[io.AudioEncoderOutput.Output()],
        )
-    CATEGORY = "conditioning"
+    @classmethod
-
+    def execute(cls, audio_encoder, audio) -> io.NodeOutput:
    def encode(self, audio_encoder, audio):
        output = audio_encoder.encode_audio(audio["waveform"], audio["sample_rate"])
-        return (output,)
+        return io.NodeOutput(output)
-NODE_CLASS_MAPPINGS = {
+class AudioEncoder(ComfyExtension):
-    "AudioEncoderLoader": AudioEncoderLoader,
+    @override
-    "AudioEncoderEncode": AudioEncoderEncode,
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-}
+        return [
            AudioEncoderLoader,
            AudioEncoderEncode,
        ]
 async def comfy_entrypoint() -> AudioEncoder:
    return AudioEncoder()
--- a/comfy_extras/nodes_differential_diffusion.py
+++ b/comfy_extras/nodes_differential_diffusion.py
@ -1,34 +1,41 @@
 # code adapted from https://github.com/exx8/differential-diffusion
 from typing_extensions import override
 import torch
 from comfy_api.latest import ComfyExtension, io
-class DifferentialDiffusion():
+
 class DifferentialDiffusion(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {
+        return io.Schema(
-            "required": {
+            node_id="DifferentialDiffusion",
-                "model": ("MODEL", ),
+            display_name="Differential Diffusion",
-            },
+            category="_for_testing",
-            "optional": {
+            inputs=[
-                "strength": ("FLOAT", {
+                io.Model.Input("model"),
-                    "default": 1.0,
+                io.Float.Input(
-                    "min": 0.0,
+                    "strength",
-                    "max": 1.0,
+                    default=1.0,
-                    "step": 0.01,
+                    min=0.0,
-                }),
+                    max=1.0,
-            }
+                    step=0.01,
-        }
+                    optional=True,
-    RETURN_TYPES = ("MODEL",)
+                ),
-    FUNCTION = "apply"
+            ],
-    CATEGORY = "_for_testing"
+            outputs=[io.Model.Output()],
-    INIT = False
+            is_experimental=True,
        )
-    def apply(self, model, strength=1.0):
+    @classmethod
    def execute(cls, model, strength=1.0) -> io.NodeOutput:
        model = model.clone()
-        model.set_model_denoise_mask_function(lambda *args, **kwargs: self.forward(*args, **kwargs, strength=strength))
+        model.set_model_denoise_mask_function(lambda *args, **kwargs: cls.forward(*args, **kwargs, strength=strength))
-        return (model, )
+        return io.NodeOutput(model)
-    def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
+    @classmethod
    def forward(cls, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict, strength: float):
        model = extra_options["model"]
        step_sigmas = extra_options["sigmas"]
        sigma_to = model.inner_model.model_sampling.sigma_min
@ -53,9 +60,13 @@ class DifferentialDiffusion():
            return binary_mask
-NODE_CLASS_MAPPINGS = {
+class DifferentialDiffusionExtension(ComfyExtension):
-    "DifferentialDiffusion": DifferentialDiffusion,
+    @override
-}
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-NODE_DISPLAY_NAME_MAPPINGS = {
+        return [
-    "DifferentialDiffusion": "Differential Diffusion",
+            DifferentialDiffusion,
-}
+        ]
 async def comfy_entrypoint() -> DifferentialDiffusionExtension:
    return DifferentialDiffusionExtension()
--- a/comfy_extras/nodes_edit_model.py
+++ b/comfy_extras/nodes_edit_model.py
@ -1,26 +1,38 @@
 import node_helpers
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
-class ReferenceLatent:
+class ReferenceLatent(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"conditioning": ("CONDITIONING", ),
+        return io.Schema(
-                            },
+            node_id="ReferenceLatent",
-                "optional": {"latent": ("LATENT", ),}
+            category="advanced/conditioning/edit_models",
-               }
+            description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.",
            inputs=[
                io.Conditioning.Input("conditioning"),
                io.Latent.Input("latent", optional=True),
            ],
            outputs=[
                io.Conditioning.Output(),
            ]
        )
-    RETURN_TYPES = ("CONDITIONING",)
+    @classmethod
-    FUNCTION = "append"
+    def execute(cls, conditioning, latent=None) -> io.NodeOutput:
    CATEGORY = "advanced/conditioning/edit_models"
    DESCRIPTION = "This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images."
    def append(self, conditioning, latent=None):
        if latent is not None:
            conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
-        return (conditioning, )
+        return io.NodeOutput(conditioning)
-NODE_CLASS_MAPPINGS = {
+class EditModelExtension(ComfyExtension):
-    "ReferenceLatent": ReferenceLatent,
+    @override
-}
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            ReferenceLatent,
        ]
 def comfy_entrypoint() -> EditModelExtension:
    return EditModelExtension()
--- a/comfy_extras/nodes_eps.py
+++ b/comfy_extras/nodes_eps.py
@ -0,0 +1,74 @@
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 class EpsilonScaling(io.ComfyNode):
    """
    Implements the Epsilon Scaling method from 'Elucidating the Exposure Bias in Diffusion Models'
    (https://arxiv.org/abs/2308.15321v6).
    This method mitigates exposure bias by scaling the predicted noise during sampling,
    which can significantly improve sample quality. This implementation uses the "uniform schedule"
    recommended by the paper for its practicality and effectiveness.
    """
    @classmethod
    def define_schema(cls):
        return io.Schema(
            node_id="Epsilon Scaling",
            category="model_patches/unet",
            inputs=[
                io.Model.Input("model"),
                io.Float.Input(
                    "scaling_factor",
                    default=1.005,
                    min=0.5,
                    max=1.5,
                    step=0.001,
                    display_mode=io.NumberDisplay.number,
                ),
            ],
            outputs=[
                io.Model.Output(),
            ],
        )
    @classmethod
    def execute(cls, model, scaling_factor) -> io.NodeOutput:
        # Prevent division by zero, though the UI's min value should prevent this.
        if scaling_factor == 0:
            scaling_factor = 1e-9
        def epsilon_scaling_function(args):
            """
            This function is applied after the CFG guidance has been calculated.
            It recalculates the denoised latent by scaling the predicted noise.
            """
            denoised = args["denoised"]
            x = args["input"]
            noise_pred = x - denoised
            scaled_noise_pred = noise_pred / scaling_factor
            new_denoised = x - scaled_noise_pred
            return new_denoised
        # Clone the model patcher to avoid modifying the original model in place
        model_clone = model.clone()
        model_clone.set_model_sampler_post_cfg_function(epsilon_scaling_function)
        return io.NodeOutput(model_clone)
 class EpsilonScalingExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            EpsilonScaling,
        ]
 async def comfy_entrypoint() -> EpsilonScalingExtension:
    return EpsilonScalingExtension()
--- a/comfy_extras/nodes_gits.py
+++ b/comfy_extras/nodes_gits.py
@ -1,6 +1,8 @@
 # from https://github.com/zju-pi/diff-sampler/tree/main/gits-main
 import numpy as np
 import torch
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 def loglinear_interp(t_steps, num_steps):
    """
@ -333,25 +335,28 @@ NOISE_LEVELS = {
    ],
 }
-class GITSScheduler:
+class GITSScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required":
+        return io.Schema(
-                    {"coeff": ("FLOAT", {"default": 1.20, "min": 0.80, "max": 1.50, "step": 0.05}),
+            node_id="GITSScheduler",
-                     "steps": ("INT", {"default": 10, "min": 2, "max": 1000}),
+            category="sampling/custom_sampling/schedulers",
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
+            inputs=[
-                      }
+                io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05),
-               }
+                io.Int.Input("steps", default=10, min=2, max=1000),
-    RETURN_TYPES = ("SIGMAS",)
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
-    CATEGORY = "sampling/custom_sampling/schedulers"
+            ],
            outputs=[
                io.Sigmas.Output(),
            ],
        )
-    FUNCTION = "get_sigmas"
+    @classmethod
-
+    def execute(cls, coeff, steps, denoise):
    def get_sigmas(self, coeff, steps, denoise):
        total_steps = steps
        if denoise < 1.0:
            if denoise <= 0.0:
-                return (torch.FloatTensor([]),)
+                return io.NodeOutput(torch.FloatTensor([]))
            total_steps = round(steps * denoise)
        if steps <= 20:
@ -362,8 +367,16 @@ class GITSScheduler:
        sigmas = sigmas[-(total_steps + 1):]
        sigmas[-1] = 0
-        return (torch.FloatTensor(sigmas), )
+        return io.NodeOutput(torch.FloatTensor(sigmas))
-NODE_CLASS_MAPPINGS = {
+
-    "GITSScheduler": GITSScheduler,
+class GITSSchedulerExtension(ComfyExtension):
-}
+    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            GITSScheduler,
        ]
 async def comfy_entrypoint() -> GITSSchedulerExtension:
    return GITSSchedulerExtension()
--- a/comfy_extras/nodes_ip2p.py
+++ b/comfy_extras/nodes_ip2p.py
@ -1,21 +1,30 @@
 import torch
-class InstructPixToPixConditioning:
+from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 class InstructPixToPixConditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"positive": ("CONDITIONING", ),
+        return io.Schema(
-                             "negative": ("CONDITIONING", ),
+            node_id="InstructPixToPixConditioning",
-                             "vae": ("VAE", ),
+            category="conditioning/instructpix2pix",
-                             "pixels": ("IMAGE", ),
+            inputs=[
-                             }}
+                io.Conditioning.Input("positive"),
                io.Conditioning.Input("negative"),
                io.Vae.Input("vae"),
                io.Image.Input("pixels"),
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent"),
            ],
        )
-    RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT")
+    @classmethod
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def execute(cls, positive, negative, pixels, vae) -> io.NodeOutput:
    FUNCTION = "encode"
    CATEGORY = "conditioning/instructpix2pix"
    def encode(self, positive, negative, pixels, vae):
        x = (pixels.shape[1] // 8) * 8
        y = (pixels.shape[2] // 8) * 8
@ -38,8 +47,17 @@ class InstructPixToPixConditioning:
                n = [t[0], d]
                c.append(n)
            out.append(c)
-        return (out[0], out[1], out_latent)
+        return io.NodeOutput(out[0], out[1], out_latent)
 class InstructPix2PixExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            InstructPixToPixConditioning,
        ]
 async def comfy_entrypoint() -> InstructPix2PixExtension:
    return InstructPix2PixExtension()
 NODE_CLASS_MAPPINGS = {
    "InstructPixToPixConditioning": InstructPixToPixConditioning,
 }
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -1,4 +1,3 @@
 import io
 import nodes
 import node_helpers
 import torch
@ -8,46 +7,60 @@ import comfy.utils
 import math
 import numpy as np
 import av
 from io import BytesIO
 from typing_extensions import override
 from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
 from comfy_api.latest import ComfyExtension, io
-class EmptyLTXVLatentVideo:
+class EmptyLTXVLatentVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+        return io.Schema(
-                              "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+            node_id="EmptyLTXVLatentVideo",
-                              "length": ("INT", {"default": 97, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 8}),
+            category="latent/video/ltxv",
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+            inputs=[
-    RETURN_TYPES = ("LATENT",)
+                io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
-    FUNCTION = "generate"
+                io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
                io.Int.Input("length", default=97, min=1, max=nodes.MAX_RESOLUTION, step=8),
                io.Int.Input("batch_size", default=1, min=1, max=4096),
            ],
            outputs=[
                io.Latent.Output(),
            ],
        )
-    CATEGORY = "latent/video/ltxv"
+    @classmethod
-
+    def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
    def generate(self, width, height, length, batch_size=1):
        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
-        return ({"samples": latent}, )
+        return io.NodeOutput({"samples": latent})
-class LTXVImgToVideo:
+class LTXVImgToVideo(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"positive": ("CONDITIONING", ),
+        return io.Schema(
-                             "negative": ("CONDITIONING", ),
+            node_id="LTXVImgToVideo",
-                             "vae": ("VAE",),
+            category="conditioning/video_models",
-                             "image": ("IMAGE",),
+            inputs=[
-                             "width": ("INT", {"default": 768, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                io.Conditioning.Input("positive"),
-                             "height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
+                io.Conditioning.Input("negative"),
-                             "length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                io.Vae.Input("vae"),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
+                io.Image.Input("image"),
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0}),
+                io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
-                             }}
+                io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
                io.Int.Input("length", default=97, min=9, max=nodes.MAX_RESOLUTION, step=8),
                io.Int.Input("batch_size", default=1, min=1, max=4096),
                io.Float.Input("strength", default=1.0, min=0.0, max=1.0),
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent"),
            ],
        )
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+    @classmethod
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def execute(cls, positive, negative, image, vae, width, height, length, batch_size, strength) -> io.NodeOutput:
    CATEGORY = "conditioning/video_models"
    FUNCTION = "generate"
    def generate(self, positive, negative, image, vae, width, height, length, batch_size, strength):
        pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
        encode_pixels = pixels[:, :, :, :3]
        t = vae.encode(encode_pixels)
@ -62,7 +75,7 @@ class LTXVImgToVideo:
        )
        conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength
-        return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask}, )
+        return io.NodeOutput(positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask})
 def conditioning_get_any_value(conditioning, key, default=None):
@ -93,35 +106,46 @@ def get_keyframe_idxs(cond):
    num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
    return keyframe_idxs, num_keyframes
-class LTXVAddGuide:
+class LTXVAddGuide(io.ComfyNode):
    NUM_PREFIX_FRAMES = 2
    PATCHIFIER = SymmetricPatchifier(1)
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"positive": ("CONDITIONING", ),
+        return io.Schema(
-                             "negative": ("CONDITIONING", ),
+            node_id="LTXVAddGuide",
-                             "vae": ("VAE",),
+            category="conditioning/video_models",
-                             "latent": ("LATENT",),
+            inputs=[
-                             "image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
+                io.Conditioning.Input("positive"),
-                                                 "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
+                io.Conditioning.Input("negative"),
-                             "frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
+                io.Vae.Input("vae"),
-                                                   "tooltip": "Frame index to start the conditioning at. For single-frame images or "
+                io.Latent.Input("latent"),
-                                                   "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
+                io.Image.Input(
-                                                   "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
+                    "image",
-                                                   "the nearest multiple of 8. Negative values are counted from the end of the video."}),
+                    tooltip="Image or video to condition the latent video on. Must be 8*n + 1 frames. "
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
+                            "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.",
-                             }
+                ),
-            }
+                io.Int.Input(
                    "frame_idx",
                    default=0,
                    min=-9999,
                    max=9999,
                    tooltip="Frame index to start the conditioning at. "
                            "For single-frame images or videos with 1-8 frames, any frame_idx value is acceptable. "
                            "For videos with 9+ frames, frame_idx must be divisible by 8, otherwise it will be rounded "
                            "down to the nearest multiple of 8. Negative values are counted from the end of the video.",
                ),
                io.Float.Input("strength", default=1.0, min=0.0, max=1.0, step=0.01),
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent"),
            ],
        )
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+    @classmethod
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def encode(cls, vae, latent_width, latent_height, images, scale_factors):
    CATEGORY = "conditioning/video_models"
    FUNCTION = "generate"
    def __init__(self):
        self._num_prefix_frames = 2
        self._patchifier = SymmetricPatchifier(1)
    def encode(self, vae, latent_width, latent_height, images, scale_factors):
        time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
        images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
        pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
@ -129,7 +153,8 @@ class LTXVAddGuide:
        t = vae.encode(encode_pixels)
        return encode_pixels, t
-    def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
+    @classmethod
    def get_latent_index(cls, cond, latent_length, guide_length, frame_idx, scale_factors):
        time_scale_factor, _, _ = scale_factors
        _, num_keyframes = get_keyframe_idxs(cond)
        latent_count = latent_length - num_keyframes
@ -141,9 +166,10 @@ class LTXVAddGuide:
        return frame_idx, latent_idx
-    def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
+    @classmethod
    def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors):
        keyframe_idxs, _ = get_keyframe_idxs(cond)
-        _, latent_coords = self._patchifier.patchify(guiding_latent)
+        _, latent_coords = cls.PATCHIFIER.patchify(guiding_latent)
        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0)  # we need the causal fix only if we're placing the new latents at index 0
        pixel_coords[:, 0] += frame_idx
        if keyframe_idxs is None:
@ -152,8 +178,9 @@ class LTXVAddGuide:
            keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
        return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
-    def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
+    @classmethod
-        _, latent_idx = self.get_latent_index(
+    def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
        _, latent_idx = cls.get_latent_index(
            cond=positive,
            latent_length=latent_image.shape[2],
            guide_length=guiding_latent.shape[2],
@ -162,8 +189,8 @@ class LTXVAddGuide:
        )
        noise_mask[:, :, latent_idx:latent_idx + guiding_latent.shape[2]] = 1.0
-        positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
+        positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
-        negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
+        negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
        mask = torch.full(
            (noise_mask.shape[0], 1, guiding_latent.shape[2], noise_mask.shape[3], noise_mask.shape[4]),
@ -176,7 +203,8 @@ class LTXVAddGuide:
        noise_mask = torch.cat([noise_mask, mask], dim=2)
        return positive, negative, latent_image, noise_mask
-    def replace_latent_frames(self, latent_image, noise_mask, guiding_latent, latent_idx, strength):
+    @classmethod
    def replace_latent_frames(cls, latent_image, noise_mask, guiding_latent, latent_idx, strength):
        cond_length = guiding_latent.shape[2]
        assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
@ -195,20 +223,21 @@ class LTXVAddGuide:
        return latent_image, noise_mask
-    def generate(self, positive, negative, vae, latent, image, frame_idx, strength):
+    @classmethod
    def execute(cls, positive, negative, vae, latent, image, frame_idx, strength) -> io.NodeOutput:
        scale_factors = vae.downscale_index_formula
        latent_image = latent["samples"]
        noise_mask = get_noise_mask(latent)
        _, _, latent_length, latent_height, latent_width = latent_image.shape
-        image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
+        image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
-        frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
+        frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
        assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
-        num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
+        num_prefix_frames = min(cls.NUM_PREFIX_FRAMES, t.shape[2])
-        positive, negative, latent_image, noise_mask = self.append_keyframe(
+        positive, negative, latent_image, noise_mask = cls.append_keyframe(
            positive,
            negative,
            frame_idx,
@ -223,9 +252,9 @@ class LTXVAddGuide:
        t = t[:, :, num_prefix_frames:]
        if t.shape[2] == 0:
-            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+            return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
-        latent_image, noise_mask = self.replace_latent_frames(
+        latent_image, noise_mask = cls.replace_latent_frames(
            latent_image,
            noise_mask,
            t,
@ -233,34 +262,35 @@ class LTXVAddGuide:
            strength,
        )
-        return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
-class LTXVCropGuides:
+class LTXVCropGuides(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"positive": ("CONDITIONING", ),
+        return io.Schema(
-                             "negative": ("CONDITIONING", ),
+            node_id="LTXVCropGuides",
-                             "latent": ("LATENT",),
+            category="conditioning/video_models",
-                             }
+            inputs=[
-            }
+                io.Conditioning.Input("positive"),
                io.Conditioning.Input("negative"),
                io.Latent.Input("latent"),
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent"),
            ],
        )
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+    @classmethod
-    RETURN_NAMES = ("positive", "negative", "latent")
+    def execute(cls, positive, negative, latent) -> io.NodeOutput:
    CATEGORY = "conditioning/video_models"
    FUNCTION = "crop"
    def __init__(self):
        self._patchifier = SymmetricPatchifier(1)
    def crop(self, positive, negative, latent):
        latent_image = latent["samples"].clone()
        noise_mask = get_noise_mask(latent)
        _, num_keyframes = get_keyframe_idxs(positive)
        if num_keyframes == 0:
-            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+            return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
        latent_image = latent_image[:, :, :-num_keyframes]
        noise_mask = noise_mask[:, :, :-num_keyframes]
@ -268,44 +298,52 @@ class LTXVCropGuides:
        positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
        negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
-        return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
+        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
-class LTXVConditioning:
+class LTXVConditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"positive": ("CONDITIONING", ),
+        return io.Schema(
-                             "negative": ("CONDITIONING", ),
+            node_id="LTXVConditioning",
-                             "frame_rate": ("FLOAT", {"default": 25.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
+            category="conditioning/video_models",
-                             }}
+            inputs=[
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
+                io.Conditioning.Input("positive"),
-    RETURN_NAMES = ("positive", "negative")
+                io.Conditioning.Input("negative"),
-    FUNCTION = "append"
+                io.Float.Input("frame_rate", default=25.0, min=0.0, max=1000.0, step=0.01),
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
            ],
        )
-    CATEGORY = "conditioning/video_models"
+    @classmethod
-
+    def execute(cls, positive, negative, frame_rate) -> io.NodeOutput:
    def append(self, positive, negative, frame_rate):
        positive = node_helpers.conditioning_set_values(positive, {"frame_rate": frame_rate})
        negative = node_helpers.conditioning_set_values(negative, {"frame_rate": frame_rate})
-        return (positive, negative)
+        return io.NodeOutput(positive, negative)
-class ModelSamplingLTXV:
+class ModelSamplingLTXV(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "model": ("MODEL",),
+        return io.Schema(
-                              "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
+            node_id="ModelSamplingLTXV",
-                              "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
+            category="advanced/model",
-                              },
+            inputs=[
-                "optional": {"latent": ("LATENT",), }
+                io.Model.Input("model"),
-                }
+                io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
                io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
                io.Latent.Input("latent", optional=True),
            ],
            outputs=[
                io.Model.Output(),
            ],
        )
-    RETURN_TYPES = ("MODEL",)
+    @classmethod
-    FUNCTION = "patch"
+    def execute(cls, model, max_shift, base_shift, latent=None) -> io.NodeOutput:
    CATEGORY = "advanced/model"
    def patch(self, model, max_shift, base_shift, latent=None):
        m = model.clone()
        if latent is None:
@ -329,37 +367,41 @@ class ModelSamplingLTXV:
        model_sampling.set_parameters(shift=shift)
        m.add_object_patch("model_sampling", model_sampling)
-        return (m, )
+        return io.NodeOutput(m)
-class LTXVScheduler:
+class LTXVScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required":
+        return io.Schema(
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
+            node_id="LTXVScheduler",
-                     "max_shift": ("FLOAT", {"default": 2.05, "min": 0.0, "max": 100.0, "step":0.01}),
+            category="sampling/custom_sampling/schedulers",
-                     "base_shift": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 100.0, "step":0.01}),
+            inputs=[
-                     "stretch": ("BOOLEAN", {
+                io.Int.Input("steps", default=20, min=1, max=10000),
-                        "default": True,
+                io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
-                        "tooltip": "Stretch the sigmas to be in the range [terminal, 1]."
+                io.Float.Input("base_shift", default=0.95, min=0.0, max=100.0, step=0.01),
-                    }),
+                io.Boolean.Input(
-                     "terminal": (
+                    id="stretch",
-                        "FLOAT",
+                    default=True,
-                        {
+                    tooltip="Stretch the sigmas to be in the range [terminal, 1].",
-                            "default": 0.1, "min": 0.0, "max": 0.99, "step": 0.01,
+                ),
-                            "tooltip": "The terminal value of the sigmas after stretching."
+                io.Float.Input(
-                        },
+                    id="terminal",
-                    ),
+                    default=0.1,
-                    },
+                    min=0.0,
-                "optional": {"latent": ("LATENT",), }
+                    max=0.99,
-               }
+                    step=0.01,
                    tooltip="The terminal value of the sigmas after stretching.",
                ),
                io.Latent.Input("latent", optional=True),
            ],
            outputs=[
                io.Sigmas.Output(),
            ],
        )
-    RETURN_TYPES = ("SIGMAS",)
+    @classmethod
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def execute(cls, steps, max_shift, base_shift, stretch, terminal, latent=None) -> io.NodeOutput:
    FUNCTION = "get_sigmas"
    def get_sigmas(self, steps, max_shift, base_shift, stretch, terminal, latent=None):
        if latent is None:
            tokens = 4096
        else:
@ -389,7 +431,7 @@ class LTXVScheduler:
            stretched = 1.0 - (one_minus_z / scale_factor)
            sigmas[non_zero_mask] = stretched
-        return (sigmas,)
+        return io.NodeOutput(sigmas)
 def encode_single_frame(output_file, image_array: np.ndarray, crf):
    container = av.open(output_file, "w", format="mp4")
@ -423,52 +465,54 @@ def preprocess(image: torch.Tensor, crf=29):
        return image
    image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
-    with io.BytesIO() as output_file:
+    with BytesIO() as output_file:
        encode_single_frame(output_file, image_array, crf)
        video_bytes = output_file.getvalue()
-    with io.BytesIO(video_bytes) as video_file:
+    with BytesIO(video_bytes) as video_file:
        image_array = decode_single_frame(video_file)
    tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
    return tensor
-class LTXVPreprocess:
+class LTXVPreprocess(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {
+        return io.Schema(
-            "required": {
+            node_id="LTXVPreprocess",
-                "image": ("IMAGE",),
+            category="image",
-                "img_compression": (
+            inputs=[
-                    "INT",
+                io.Image.Input("image"),
-                    {
+                io.Int.Input(
-                        "default": 35,
+                    id="img_compression", default=35, min=0, max=100, tooltip="Amount of compression to apply on image."
                        "min": 0,
                        "max": 100,
                        "tooltip": "Amount of compression to apply on image.",
                    },
                ),
-            }
+            ],
-        }
+            outputs=[
                io.Image.Output(display_name="output_image"),
            ],
        )
-    FUNCTION = "preprocess"
+    @classmethod
-    RETURN_TYPES = ("IMAGE",)
+    def execute(cls, image, img_compression) -> io.NodeOutput:
    RETURN_NAMES = ("output_image",)
    CATEGORY = "image"
    def preprocess(self, image, img_compression):
        output_images = []
        for i in range(image.shape[0]):
            output_images.append(preprocess(image[i], img_compression))
-        return (torch.stack(output_images),)
+        return io.NodeOutput(torch.stack(output_images))
-NODE_CLASS_MAPPINGS = {
+class LtxvExtension(ComfyExtension):
-    "EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
+    @override
-    "LTXVImgToVideo": LTXVImgToVideo,
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-    "ModelSamplingLTXV": ModelSamplingLTXV,
+        return [
-    "LTXVConditioning": LTXVConditioning,
+            EmptyLTXVLatentVideo,
-    "LTXVScheduler": LTXVScheduler,
+            LTXVImgToVideo,
-    "LTXVAddGuide": LTXVAddGuide,
+            ModelSamplingLTXV,
-    "LTXVPreprocess": LTXVPreprocess,
+            LTXVConditioning,
-    "LTXVCropGuides": LTXVCropGuides,
+            LTXVScheduler,
-}
+            LTXVAddGuide,
            LTXVPreprocess,
            LTXVCropGuides,
        ]
 async def comfy_entrypoint() -> LtxvExtension:
    return LtxvExtension()
--- a/comfy_extras/nodes_morphology.py
+++ b/comfy_extras/nodes_morphology.py
@ -1,24 +1,34 @@
 import torch
 import comfy.model_management
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
 import kornia.color
-class Morphology:
+class Morphology(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"image": ("IMAGE",),
+        return io.Schema(
-                                "operation": (["erode",  "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
+            node_id="Morphology",
-                                "kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
+            display_name="ImageMorphology",
-                                }}
+            category="image/postprocessing",
            inputs=[
                io.Image.Input("image"),
                io.Combo.Input(
                    "operation",
                    options=["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],
                ),
                io.Int.Input("kernel_size", default=3, min=3, max=999, step=1),
            ],
            outputs=[
                io.Image.Output(),
            ],
        )
-    RETURN_TYPES = ("IMAGE",)
+    @classmethod
-    FUNCTION = "process"
+    def execute(cls, image, operation, kernel_size) -> io.NodeOutput:
    CATEGORY = "image/postprocessing"
    def process(self, image, operation, kernel_size):
        device = comfy.model_management.get_torch_device()
        kernel = torch.ones(kernel_size, kernel_size, device=device)
        image_k = image.to(device).movedim(-1, 1)
@ -39,49 +49,63 @@ class Morphology:
        else:
            raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
        img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
-        return (img_out,)
+        return io.NodeOutput(img_out)
-class ImageRGBToYUV:
+class ImageRGBToYUV(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "image": ("IMAGE",),
+        return io.Schema(
-                              }}
+            node_id="ImageRGBToYUV",
            category="image/batch",
            inputs=[
                io.Image.Input("image"),
            ],
            outputs=[
                io.Image.Output(display_name="Y"),
                io.Image.Output(display_name="U"),
                io.Image.Output(display_name="V"),
            ],
        )
-    RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE")
+    @classmethod
-    RETURN_NAMES = ("Y", "U", "V")
+    def execute(cls, image) -> io.NodeOutput:
    FUNCTION = "execute"
    CATEGORY = "image/batch"
    def execute(self, image):
        out = kornia.color.rgb_to_ycbcr(image.movedim(-1, 1)).movedim(1, -1)
-        return (out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))
+        return io.NodeOutput(out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image))
-class ImageYUVToRGB:
+class ImageYUVToRGB(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": {"Y": ("IMAGE",),
+        return io.Schema(
-                             "U": ("IMAGE",),
+            node_id="ImageYUVToRGB",
-                             "V": ("IMAGE",),
+            category="image/batch",
-                              }}
+            inputs=[
                io.Image.Input("Y"),
                io.Image.Input("U"),
                io.Image.Input("V"),
            ],
            outputs=[
                io.Image.Output(),
            ],
        )
-    RETURN_TYPES = ("IMAGE",)
+    @classmethod
-    FUNCTION = "execute"
+    def execute(cls, Y, U, V) -> io.NodeOutput:
    CATEGORY = "image/batch"
    def execute(self, Y, U, V):
        image = torch.cat([torch.mean(Y, dim=-1, keepdim=True), torch.mean(U, dim=-1, keepdim=True), torch.mean(V, dim=-1, keepdim=True)], dim=-1)
        out = kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1)
-        return (out,)
+        return io.NodeOutput(out)
 NODE_CLASS_MAPPINGS = {
    "Morphology": Morphology,
    "ImageRGBToYUV": ImageRGBToYUV,
    "ImageYUVToRGB": ImageYUVToRGB,
 }
-NODE_DISPLAY_NAME_MAPPINGS = {
+class MorphologyExtension(ComfyExtension):
-    "Morphology": "ImageMorphology",
+    @override
-}
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            Morphology,
            ImageRGBToYUV,
            ImageYUVToRGB,
        ]
 async def comfy_entrypoint() -> MorphologyExtension:
    return MorphologyExtension()
--- a/comfy_extras/nodes_optimalsteps.py
+++ b/comfy_extras/nodes_optimalsteps.py
@ -1,9 +1,12 @@
 # from https://github.com/bebebe666/OptimalSteps
 import numpy as np
 import torch
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 def loglinear_interp(t_steps, num_steps):
    """
    Performs log-linear interpolation of a given array of decreasing numbers.
@ -23,25 +26,28 @@ NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0
 "Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
 }
-class OptimalStepsScheduler:
+class OptimalStepsScheduler(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required":
+        return io.Schema(
-                    {"model_type": (["FLUX", "Wan", "Chroma"], ),
+            node_id="OptimalStepsScheduler",
-                     "steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
+            category="sampling/custom_sampling/schedulers",
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
+            inputs=[
-                      }
+                io.Combo.Input("model_type", options=["FLUX", "Wan", "Chroma"]),
-               }
+                io.Int.Input("steps", default=20, min=3, max=1000),
-    RETURN_TYPES = ("SIGMAS",)
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
-    CATEGORY = "sampling/custom_sampling/schedulers"
+            ],
            outputs=[
                io.Sigmas.Output(),
            ],
        )
-    FUNCTION = "get_sigmas"
+    @classmethod
-
+    def execute(cls, model_type, steps, denoise) ->io.NodeOutput:
    def get_sigmas(self, model_type, steps, denoise):
        total_steps = steps
        if denoise < 1.0:
            if denoise <= 0.0:
-                return (torch.FloatTensor([]),)
+                return io.NodeOutput(torch.FloatTensor([]))
            total_steps = round(steps * denoise)
        sigmas = NOISE_LEVELS[model_type][:]
@ -50,8 +56,16 @@ class OptimalStepsScheduler:
        sigmas = sigmas[-(total_steps + 1):]
        sigmas[-1] = 0
-        return (torch.FloatTensor(sigmas), )
+        return io.NodeOutput(torch.FloatTensor(sigmas))
-NODE_CLASS_MAPPINGS = {
+
-    "OptimalStepsScheduler": OptimalStepsScheduler,
+class OptimalStepsExtension(ComfyExtension):
-}
+    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            OptimalStepsScheduler,
        ]
 async def comfy_entrypoint() -> OptimalStepsExtension:
    return OptimalStepsExtension()
--- a/comfy_extras/nodes_pag.py
+++ b/comfy_extras/nodes_pag.py
@ -3,25 +3,30 @@
 #My modified one here is more basic but has less chances of breaking with ComfyUI updates.
 from typing_extensions import override
 import comfy.model_patcher
 import comfy.samplers
 from comfy_api.latest import ComfyExtension, io
-class PerturbedAttentionGuidance:
+
 class PerturbedAttentionGuidance(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {
+        return io.Schema(
-            "required": {
+            node_id="PerturbedAttentionGuidance",
-                "model": ("MODEL",),
+            category="model_patches/unet",
-                "scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": 0.01}),
+            inputs=[
-            }
+                io.Model.Input("model"),
-        }
+                io.Float.Input("scale", default=3.0, min=0.0, max=100.0, step=0.01, round=0.01),
            ],
            outputs=[
                io.Model.Output(),
            ],
        )
-    RETURN_TYPES = ("MODEL",)
+    @classmethod
-    FUNCTION = "patch"
+    def execute(cls, model, scale) -> io.NodeOutput:
    CATEGORY = "model_patches/unet"
    def patch(self, model, scale):
        unet_block = "middle"
        unet_block_id = 0
        m = model.clone()
@ -49,8 +54,16 @@ class PerturbedAttentionGuidance:
        m.set_model_sampler_post_cfg_function(post_cfg_function)
-        return (m,)
+        return io.NodeOutput(m)
-NODE_CLASS_MAPPINGS = {
+
-    "PerturbedAttentionGuidance": PerturbedAttentionGuidance,
+class PAGExtension(ComfyExtension):
-}
+    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            PerturbedAttentionGuidance,
        ]
 async def comfy_entrypoint() -> PAGExtension:
    return PAGExtension()
--- a/comfy_extras/nodes_stable3d.py
+++ b/comfy_extras/nodes_stable3d.py
@ -1,6 +1,8 @@
 import torch
 import nodes
 import comfy.utils
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 def camera_embeddings(elevation, azimuth):
    elevation = torch.as_tensor([elevation])
@ -20,26 +22,31 @@ def camera_embeddings(elevation, azimuth):
    return embeddings
-class StableZero123_Conditioning:
+class StableZero123_Conditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
+        return io.Schema(
-                              "init_image": ("IMAGE",),
+            node_id="StableZero123_Conditioning",
-                              "vae": ("VAE",),
+            category="conditioning/3d_models",
-                              "width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+            inputs=[
-                              "height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                io.ClipVision.Input("clip_vision"),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
+                io.Image.Input("init_image"),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Vae.Input("vae"),
-                              "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
-                             }}
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
-    RETURN_NAMES = ("positive", "negative", "latent")
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent")
            ]
        )
-    FUNCTION = "encode"
+    @classmethod
-
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth) -> io.NodeOutput:
    CATEGORY = "conditioning/3d_models"
    def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -51,30 +58,35 @@ class StableZero123_Conditioning:
        positive = [[cond, {"concat_latent_image": t}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent})
+        return io.NodeOutput(positive, negative, {"samples":latent})
-class StableZero123_Conditioning_Batched:
+class StableZero123_Conditioning_Batched(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
+        return io.Schema(
-                              "init_image": ("IMAGE",),
+            node_id="StableZero123_Conditioning_Batched",
-                              "vae": ("VAE",),
+            category="conditioning/3d_models",
-                              "width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+            inputs=[
-                              "height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                io.ClipVision.Input("clip_vision"),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
+                io.Image.Input("init_image"),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Vae.Input("vae"),
-                              "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
-                              "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
-                              "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
-                             }}
+                io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+                io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
-    RETURN_NAMES = ("positive", "negative", "latent")
+                io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
                io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent")
            ]
        )
-    FUNCTION = "encode"
+    @classmethod
-
+    def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment) -> io.NodeOutput:
    CATEGORY = "conditioning/3d_models"
    def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -93,27 +105,32 @@ class StableZero123_Conditioning_Batched:
        positive = [[cond, {"concat_latent_image": t}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
+        return io.NodeOutput(positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
-class SV3D_Conditioning:
+class SV3D_Conditioning(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "clip_vision": ("CLIP_VISION",),
+        return io.Schema(
-                              "init_image": ("IMAGE",),
+            node_id="SV3D_Conditioning",
-                              "vae": ("VAE",),
+            category="conditioning/3d_models",
-                              "width": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+            inputs=[
-                              "height": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
+                io.ClipVision.Input("clip_vision"),
-                              "video_frames": ("INT", {"default": 21, "min": 1, "max": 4096}),
+                io.Image.Input("init_image"),
-                              "elevation": ("FLOAT", {"default": 0.0, "min": -90.0, "max": 90.0, "step": 0.1, "round": False}),
+                io.Vae.Input("vae"),
-                             }}
+                io.Int.Input("width", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+                io.Int.Input("height", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
-    RETURN_NAMES = ("positive", "negative", "latent")
+                io.Int.Input("video_frames", default=21, min=1, max=4096),
                io.Float.Input("elevation", default=0.0, min=-90.0, max=90.0, step=0.1, round=False)
            ],
            outputs=[
                io.Conditioning.Output(display_name="positive"),
                io.Conditioning.Output(display_name="negative"),
                io.Latent.Output(display_name="latent")
            ]
        )
-    FUNCTION = "encode"
+    @classmethod
-
+    def execute(cls, clip_vision, init_image, vae, width, height, video_frames, elevation) -> io.NodeOutput:
    CATEGORY = "conditioning/3d_models"
    def encode(self, clip_vision, init_image, vae, width, height, video_frames, elevation):
        output = clip_vision.encode_image(init_image)
        pooled = output.image_embeds.unsqueeze(0)
        pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
@ -133,11 +150,17 @@ class SV3D_Conditioning:
        positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
        latent = torch.zeros([video_frames, 4, height // 8, width // 8])
-        return (positive, negative, {"samples":latent})
+        return io.NodeOutput(positive, negative, {"samples":latent})
-NODE_CLASS_MAPPINGS = {
+class Stable3DExtension(ComfyExtension):
-    "StableZero123_Conditioning": StableZero123_Conditioning,
+    @override
-    "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched,
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-    "SV3D_Conditioning": SV3D_Conditioning,
+        return [
-}
+            StableZero123_Conditioning,
            StableZero123_Conditioning_Batched,
            SV3D_Conditioning,
        ]
 async def comfy_entrypoint() -> Stable3DExtension:
    return Stable3DExtension()
--- a/comfy_extras/nodes_tomesd.py
+++ b/comfy_extras/nodes_tomesd.py
@ -1,7 +1,9 @@
 #Taken from: https://github.com/dbolya/tomesd
 import torch
-from typing import Tuple, Callable
+from typing import Tuple, Callable, Optional
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 import math
 def do_nothing(x: torch.Tensor, mode:str=None):
@ -144,33 +146,45 @@ def get_functions(x, ratio, original_shape):
-class TomePatchModel:
+class TomePatchModel(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls):
-        return {"required": { "model": ("MODEL",),
+        return io.Schema(
-                              "ratio": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}),
+            node_id="TomePatchModel",
-                              }}
+            category="model_patches/unet",
-    RETURN_TYPES = ("MODEL",)
+            inputs=[
-    FUNCTION = "patch"
+                io.Model.Input("model"),
                io.Float.Input("ratio", default=0.3, min=0.0, max=1.0, step=0.01),
            ],
            outputs=[io.Model.Output()],
        )
-    CATEGORY = "model_patches/unet"
+    @classmethod
-
+    def execute(cls, model, ratio) -> io.NodeOutput:
-    def patch(self, model, ratio):
+        u: Optional[Callable] = None
        self.u = None
        def tomesd_m(q, k, v, extra_options):
            nonlocal u
            #NOTE: In the reference code get_functions takes x (input of the transformer block) as the argument instead of q
            #however from my basic testing it seems that using q instead gives better results
-            m, self.u = get_functions(q, ratio, extra_options["original_shape"])
+            m, u = get_functions(q, ratio, extra_options["original_shape"])
            return m(q), k, v
        def tomesd_u(n, extra_options):
-            return self.u(n)
+            nonlocal u
            return u(n)
        m = model.clone()
        m.set_model_attn1_patch(tomesd_m)
        m.set_model_attn1_output_patch(tomesd_u)
-        return (m, )
+        return io.NodeOutput(m)
-NODE_CLASS_MAPPINGS = {
+class TomePatchModelExtension(ComfyExtension):
-    "TomePatchModel": TomePatchModel,
+    @override
-}
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            TomePatchModel,
        ]
 async def comfy_entrypoint() -> TomePatchModelExtension:
    return TomePatchModelExtension()
--- a/comfy_extras/nodes_torch_compile.py
+++ b/comfy_extras/nodes_torch_compile.py
@ -1,23 +1,39 @@
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 from comfy_api.torch_helpers import set_torch_compile_wrapper
-class TorchCompileModel:
+class TorchCompileModel(io.ComfyNode):
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls) -> io.Schema:
-        return {"required": { "model": ("MODEL",),
+        return io.Schema(
-                             "backend": (["inductor", "cudagraphs"],),
+            node_id="TorchCompileModel",
-                              }}
+            category="_for_testing",
-    RETURN_TYPES = ("MODEL",)
+            inputs=[
-    FUNCTION = "patch"
+                io.Model.Input("model"),
                io.Combo.Input(
                    "backend",
                    options=["inductor", "cudagraphs"],
                ),
            ],
            outputs=[io.Model.Output()],
            is_experimental=True,
        )
-    CATEGORY = "_for_testing"
+    @classmethod
-    EXPERIMENTAL = True
+    def execute(cls, model, backend) -> io.NodeOutput:
    def patch(self, model, backend):
        m = model.clone()
        set_torch_compile_wrapper(model=m, backend=backend)
-        return (m, )
+        return io.NodeOutput(m)
-NODE_CLASS_MAPPINGS = {
+
-    "TorchCompileModel": TorchCompileModel,
+class TorchCompileExtension(ComfyExtension):
-}
+    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            TorchCompileModel,
        ]
 async def comfy_entrypoint() -> TorchCompileExtension:
    return TorchCompileExtension()
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.62"
+__version__ = "0.3.63"
--- a/custom_nodes/example_node.py.example
+++ b/custom_nodes/example_node.py.example
@ -1,96 +1,70 @@
-class Example:
+from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 class Example(io.ComfyNode):
    """
-    A example node
+    An example node
    Class methods
    -------------
-    INPUT_TYPES (dict):
+    define_schema (io.Schema):
-        Tell the main program input parameters of nodes.
+        Tell the main program the metadata, input, output parameters of nodes.
-    IS_CHANGED:
+    fingerprint_inputs:
        optional method to control when the node is re executed.
    check_lazy_status:
        optional method to control list of input names that need to be evaluated.
    Attributes
    ----------
    RETURN_TYPES (`tuple`):
        The type of each element in the output tuple.
    RETURN_NAMES (`tuple`):
        Optional: The name of each output in the output tuple.
    FUNCTION (`str`):
        The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
    OUTPUT_NODE ([`bool`]):
        If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
        The backend iterates on these output nodes and tries to execute all their parents if their parent graph is properly connected.
        Assumed to be False if not present.
    CATEGORY (`str`):
        The category the node should appear in the UI.
    DEPRECATED (`bool`):
        Indicates whether the node is deprecated. Deprecated nodes are hidden by default in the UI, but remain
        functional in existing workflows that use them.
    EXPERIMENTAL (`bool`):
        Indicates whether the node is experimental. Experimental nodes are marked as such in the UI and may be subject to
        significant changes or removal in future versions. Use with caution in production workflows.
    execute(s) -> tuple || None:
        The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
        For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"` then it must be `foo`.
    """
    def __init__(self):
        pass
    @classmethod
-    def INPUT_TYPES(s):
+    def define_schema(cls) -> io.Schema:
        """
-            Return a dictionary which contains config for all input fields.
+            Return a schema which contains all information about the node.
-            Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
+            Some types: "Model", "Vae", "Clip", "Conditioning", "Latent", "Image", "Int", "String", "Float", "Combo".
-            Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
+            For outputs the "io.Model.Output" should be used, for inputs the "io.Model.Input" can be used.
-            The type can be a list for selection.
+            The type can be a "Combo" - this will be a list for selection.
            Returns: `dict`:
                - Key input_fields_group (`string`): Can be either required, hidden or optional. A node class must have property `required`
                - Value input_fields (`dict`): Contains input fields config:
                    * Key field_name (`string`): Name of a entry-point method's argument
                    * Value field_config (`tuple`):
                        + First value is a string indicate the type of field or a list for selection.
                        + Second value is a config for type "INT", "STRING" or "FLOAT".
        """
-        return {
+        return io.Schema(
-            "required": {
+            node_id="Example",
-                "image": ("IMAGE",),
+            display_name="Example Node",
-                "int_field": ("INT", {
+            category="Example",
-                    "default": 0, 
+            inputs=[
-                    "min": 0, #Minimum value
+                io.Image.Input("image"),
-                    "max": 4096, #Maximum value
+                io.Int.Input(
-                    "step": 64, #Slider's step
+                    "int_field",
-                    "display": "number", # Cosmetic only: display as "number" or "slider"
+                    min=0,
-                    "lazy": True # Will only be evaluated if check_lazy_status requires it
+                    max=4096,
-                }),
+                    step=64, # Slider's step
-                "float_field": ("FLOAT", {
+                    display_mode=io.NumberDisplay.number,  # Cosmetic only: display as "number" or "slider"
-                    "default": 1.0,
+                    lazy=True,  # Will only be evaluated if check_lazy_status requires it
-                    "min": 0.0,
+                ),
-                    "max": 10.0,
+                io.Float.Input(
-                    "step": 0.01,
+                    "float_field",
-                    "round": 0.001, #The value representing the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
+                    default=1.0,
-                    "display": "number",
+                    min=0.0,
-                    "lazy": True
+                    max=10.0,
-                }),
+                    step=0.01,
-                "print_to_screen": (["enable", "disable"],),
+                    round=0.001, #The value representing the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
-                "string_field": ("STRING", {
+                    display_mode=io.NumberDisplay.number,
-                    "multiline": False, #True if you want the field to look like the one on the ClipTextEncode node
+                    lazy=True,
-                    "default": "Hello World!",
+                ),
-                    "lazy": True
+                io.Combo.Input("print_to_screen", options=["enable", "disable"]),
-                }),
+                io.String.Input(
-            },
+                    "string_field",
-        }
+                    multiline=False,  # True if you want the field to look like the one on the ClipTextEncode node
                    default="Hello world!",
                    lazy=True,
                )
            ],
            outputs=[
                io.Image.Output(),
            ],
        )
-    RETURN_TYPES = ("IMAGE",)
+    @classmethod
-    #RETURN_NAMES = ("image_output_name",)
+    def check_lazy_status(cls, image, string_field, int_field, float_field, print_to_screen):
    FUNCTION = "test"
    #OUTPUT_NODE = False
    CATEGORY = "Example"
    def check_lazy_status(self, image, string_field, int_field, float_field, print_to_screen):
        """
            Return a list of input names that need to be evaluated.
@ -107,7 +81,8 @@ class Example:
        else:
            return []
-    def test(self, image, string_field, int_field, float_field, print_to_screen):
+    @classmethod
    def execute(cls, image, string_field, int_field, float_field, print_to_screen) -> io.NodeOutput:
        if print_to_screen == "enable":
            print(f"""Your input contains:
                string_field aka input text: {string_field}
@ -116,7 +91,7 @@ class Example:
            """)
        #do some processing on the image, in this example I just invert it
        image = 1.0 - image
-        return (image,)
+        return io.NodeOutput(image)
    """
        The node will always be re executed if any of the inputs change but
@ -127,7 +102,7 @@ class Example:
        changes between executions the LoadImage node is executed again.
    """
    #@classmethod
-    #def IS_CHANGED(s, image, string_field, int_field, float_field, print_to_screen):
+    #def fingerprint_inputs(s, image, string_field, int_field, float_field, print_to_screen):
    #    return ""
 # Set the web directory, any .js file in that directory will be loaded by the frontend as a frontend extension
@ -143,13 +118,13 @@ async def get_hello(request):
    return web.json_response("hello")
-# A dictionary that contains all nodes you want to export with their names
+class ExampleExtension(ComfyExtension):
-# NOTE: names should be globally unique
+    @override
-NODE_CLASS_MAPPINGS = {
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-    "Example": Example
+        return [
-}
+            Example,
        ]
-# A dictionary that contains the friendly/humanly readable titles for the nodes
+
-NODE_DISPLAY_NAME_MAPPINGS = {
+async def comfy_entrypoint() -> ExampleExtension:  # ComfyUI calls this to load your extension and its nodes.
-    "Example": "Example Node"
+    return ExampleExtension()
 }
--- a/main.py
+++ b/main.py
@ -115,6 +115,7 @@ if os.name == "nt":
    os.environ['MIMALLOC_PURGE_DELAY'] = '0'
 if __name__ == "__main__":
    os.environ['TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL'] = '1'
    if args.default_device is not None:
        default_dev = args.default_device
        devices = list(range(32))
--- a/nodes.py
+++ b/nodes.py
@ -2297,6 +2297,7 @@ async def init_builtin_extra_nodes():
        "nodes_gits.py",
        "nodes_controlnet.py",
        "nodes_hunyuan.py",
        "nodes_eps.py",
        "nodes_flux.py",
        "nodes_lora_extract.py",
        "nodes_torch_compile.py",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.62"
+version = "0.3.63"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
@ -22,3 +22,49 @@ lint.select = [
  "F",
 ]
 exclude = ["*.ipynb", "**/generated/*.pyi"]
 [tool.pylint]
 master.py-version = "3.9"
 master.extension-pkg-allow-list = [
  "pydantic",
 ]
 reports.output-format = "colorized"
 similarities.ignore-imports = "yes"
 messages_control.disable = [
  "missing-module-docstring",
  "missing-class-docstring",
  "missing-function-docstring",
  "line-too-long",
  "too-few-public-methods",
  "too-many-public-methods",
  "too-many-instance-attributes",
  "too-many-positional-arguments",
  "broad-exception-raised",
  "too-many-lines",
  "invalid-name",
  "unused-argument",
  "broad-exception-caught",
  "consider-using-with",
  "fixme",
  "too-many-statements",
  "too-many-branches",
  "too-many-locals",
  "too-many-arguments",
  "duplicate-code",
  "abstract-method",
  "superfluous-parens",
  "arguments-differ",
  "redefined-builtin",
  "unnecessary-lambda",
  "dangerous-default-value",
  "invalid-overridden-method",
  # next warnings should be fixed in future
  "bad-classmethod-argument",  # Class method should have 'cls' as first argument
  "wrong-import-order",  # Standard imports should be placed before third party imports
  "logging-fstring-interpolation", # Use lazy % formatting in logging functions
  "ungrouped-imports",
  "unnecessary-pass",
  "unnecessary-lambda-assignment",
  "no-else-return",
  "unused-variable",
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
-comfyui-frontend-package==1.26.13
+comfyui-frontend-package==1.27.7
-comfyui-workflow-templates==0.1.91
+comfyui-workflow-templates==0.1.93
 comfyui-embedded-docs==0.2.6
 torch
 torchsde
@ -25,6 +25,5 @@ av>=14.2.0
 #non essential dependencies:
 kornia>=0.7.1
 spandrel
 soundfile
 pydantic~=2.0
 pydantic-settings~=2.0
		`@ -0,0 +1,2 @@`
							`.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory`
							`pause`
`@ -52,7 +52,3 @@ class RodinResourceItem(BaseModel):`

	`class Rodin3DDownloadResponse(BaseModel):`	`class Rodin3DDownloadResponse(BaseModel):`
	`list: List[RodinResourceItem] = Field(..., description="Source List")`	`list: List[RodinResourceItem] = Field(..., description="Source List")`