Merge fffb96ad42 into c6238047ee

Put more details about portable in readme. (#11816 )
fix(api-nodes): use a unique name for uploading audio files (#11778 )
2026-01-29 15:50:22 +08:00 · 2026-01-12 20:33:00 +08:00 · 2026-01-11 21:11:53 -05:00 · 2026-01-11 03:07:11 -08:00 · 2026-01-09 03:56:50 -08:00 · 2026-01-09 01:00:06 +02:00
16 changed files with 4019 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -183,7 +183,7 @@ Simply download, extract with [7-Zip](https://7-zip.org) or with the windows exp

 If you have trouble extracting it, right click the file -> properties -> unblock

-Update your Nvidia drivers if it doesn't start.
+The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start.

 #### Alternative Downloads:

@ -212,7 +212,7 @@ Python 3.14 works but you may encounter issues with the torch compile node. The

 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12

-torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch unless it is less than 2 weeks old.
+torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old.

 ### Instructions:

--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -747,6 +747,10 @@ class ACEAudio(LatentFormat):
    latent_channels = 8
    latent_dimensions = 2

+class SeedVR2(LatentFormat):
+    latent_channels = 16
+    latent_dimensions = 16
+
 class ChromaRadiance(LatentFormat):
    latent_channels = 3

--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -19,9 +19,15 @@ if model_management.xformers_enabled():
    import xformers.ops

 SAGE_ATTENTION_IS_AVAILABLE = False
+SAGE_ATTENTION_VAR_LENGTH_AVAILABLE = False
 try:
    from sageattention import sageattn
    SAGE_ATTENTION_IS_AVAILABLE = True
+    try:
+        from sageattention import sageattn_varlen
+        SAGE_ATTENTION_VAR_LENGTH_AVAILABLE = True
+    except:
+        pass
 except ImportError as e:
    if model_management.sage_attention_enabled():
        if e.name == "sageattention":
@ -39,7 +45,7 @@ except ImportError:

 FLASH_ATTENTION_IS_AVAILABLE = False
 try:
-    from flash_attn import flash_attn_func
+    from flash_attn import flash_attn_func, flash_attn_varlen_func
    FLASH_ATTENTION_IS_AVAILABLE = True
 except ImportError:
    if model_management.flash_attention_enabled():
@ -87,7 +93,13 @@ def default(val, d):
        return val
    return d

-
+def var_attn_arg(kwargs):
+    cu_seqlens_q = kwargs.get("cu_seqlens_q", None)
+    cu_seqlens_k = kwargs.get("cu_seqlens_k", cu_seqlens_q)
+    max_seqlen_q = kwargs.get("max_seqlen_q", None)
+    max_seqlen_k = kwargs.get("max_seqlen_k", max_seqlen_q)
+    assert cu_seqlens_q is not None, "cu_seqlens_q shouldn't be None when var_length is True"
+    return cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k
 # feedforward
 class GEGLU(nn.Module):
    def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=ops):
@ -412,13 +424,14 @@ except:

@wrap_attn
 def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
+    var_length = kwargs.get("var_length", False)
    b = q.shape[0]
    dim_head = q.shape[-1]
    # check to make sure xformers isn't broken
    disabled_xformers = False

    if BROKEN_XFORMERS:
-        if b * heads > 65535:
+        if b * heads > 65535 and not var_length:
            disabled_xformers = True

    if not disabled_xformers:
@ -426,9 +439,27 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
            disabled_xformers = True

    if disabled_xformers:
-        return attention_pytorch(q, k, v, heads, mask, skip_reshape=skip_reshape, **kwargs)
+        return attention_pytorch(q, k, v, heads, mask, skip_reshape=skip_reshape, var_length=var_length, **kwargs)

-    if skip_reshape:
+    if var_length:
+        if not skip_reshape:
+            total_tokens, hidden_dim = q.shape
+            dim_head = hidden_dim // heads
+            q = q.view(1, total_tokens, heads, dim_head)
+            k = k.view(1, total_tokens, heads, dim_head)
+            v = v.view(1, total_tokens, heads, dim_head)
+        else:
+            if q.ndim == 3:
+                q = q.unsqueeze(0)
+            if k.ndim == 3:
+                k = k.unsqueeze(0)
+            if v.ndim == 3:
+                v = v.unsqueeze(0)
+            dim_head = q.shape[-1]
+
+        target_output_shape = (q.shape[1], -1)
+        b = 1
+    elif skip_reshape:
        # b h k d -> b k h d
        q, k, v = map(
            lambda t: t.permute(0, 2, 1, 3),
@ -442,7 +473,11 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
            (q, k, v),
        )

-    if mask is not None:
+    if var_length:
+        cu_seqlens_q, _, _, _ = var_attn_arg(kwargs)
+        seq_lens = cu_seqlens_q[1:] - cu_seqlens_q[:-1]
+        mask = xformers.ops.BlockDiagonalMask.from_seqlens(seq_lens_q=seq_lens, seq_lens_k=seq_lens)
+    elif mask is not None:
        # add a singleton batch dimension
        if mask.ndim == 2:
            mask = mask.unsqueeze(0)
@ -464,6 +499,8 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh

    out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)

+    if var_length:
+        return out.reshape(*target_output_shape)
    if skip_output_reshape:
        out = out.permute(0, 2, 1, 3)
    else:
@ -481,7 +518,28 @@ else:

@wrap_attn
 def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
-    if skip_reshape:
+    var_length = kwargs.get("var_length", False)
+    if var_length:
+        cu_seqlens_q, cu_seqlens_k, _, _ = var_attn_arg(kwargs)
+        if not skip_reshape:
+            # assumes 2D q, k,v [total_tokens, embed_dim]
+            total_tokens, embed_dim = q.shape
+            head_dim = embed_dim // heads
+            q = q.view(total_tokens, heads, head_dim)
+            k = k.view(k.shape[0], heads, head_dim)
+            v = v.view(v.shape[0], heads, head_dim)
+
+        b = q.size(0)
+        dim_head = q.shape[-1]
+        q = torch.nested.nested_tensor_from_jagged(q, offsets=cu_seqlens_q.long())
+        k = torch.nested.nested_tensor_from_jagged(k, offsets=cu_seqlens_k.long())
+        v = torch.nested.nested_tensor_from_jagged(v, offsets=cu_seqlens_k.long())
+
+        mask = None
+        q = q.transpose(1, 2)
+        k = k.transpose(1, 2)
+        v = v.transpose(1, 2)
+    elif skip_reshape:
        b, _, _, dim_head = q.shape
    else:
        b, _, dim_head = q.shape
@ -499,8 +557,10 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
        if mask.ndim == 3:
            mask = mask.unsqueeze(1)

-    if SDP_BATCH_LIMIT >= b:
+    if SDP_BATCH_LIMIT >= b or var_length:
        out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
+        if var_length:
+            return out.contiguous().transpose(1, 2).values()
        if not skip_output_reshape:
            out = (
                out.transpose(1, 2).reshape(b, -1, heads * dim_head)
@ -524,8 +584,19 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha

@wrap_attn
 def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
+    var_length = kwargs.get("var_length", False)
    exception_fallback = False
-    if skip_reshape:
+    if var_length:
+        if not skip_reshape:
+            total_tokens, hidden_dim = q.shape
+            dim_head = hidden_dim // heads
+            q, k, v = [t.view(total_tokens, heads, dim_head) for t in (q, k, v)]
+        b, _, dim_head = q.shape
+        # skips batched code
+        mask = None
+        tensor_layout = "VAR"
+        target_output_shape = (q.shape[0], -1)
+    elif skip_reshape:
        b, _, _, dim_head = q.shape
        tensor_layout = "HND"
    else:
@ -546,7 +617,14 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=
            mask = mask.unsqueeze(1)

    try:
-        out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout)
+        if var_length and not SAGE_ATTENTION_VAR_LENGTH_AVAILABLE:
+            raise ValueError("Sage Attention two is required to run variable length attention.")
+        elif var_length:
+            cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k = var_attn_arg(kwargs)
+            sm_scale = 1.0 / (q.shape[-1] ** 0.5)
+            out = sageattn_varlen(q, k, v, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, is_causal=False, sm_scale=sm_scale)
+        else:
+            out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout)
    except Exception as e:
        logging.error("Error running sage attention: {}, using pytorch attention instead.".format(e))
        exception_fallback = True
@ -556,7 +634,7 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=
                lambda t: t.transpose(1, 2),
                (q, k, v),
            )
-        return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=True, skip_output_reshape=skip_output_reshape, **kwargs)
+        return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=True, skip_output_reshape=skip_output_reshape, var_length=var_length, **kwargs)

    if tensor_layout == "HND":
        if not skip_output_reshape:
@ -567,6 +645,8 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=
        if skip_output_reshape:
            out = out.transpose(1, 2)
        else:
+            if var_length:
+                return out.view(*target_output_shape)
            out = out.reshape(b, -1, heads * dim_head)
    return out

@ -678,6 +758,15 @@ except AttributeError as error:

@wrap_attn
 def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
+    var_length = kwargs.get("var_length", False)
+    if var_length:
+        cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k = var_attn_arg(kwargs)
+        return flash_attn_varlen_func(
+            q=q, k=k, v=v,
+            cu_seqlens_q=cu_seqlens_q, cu_seqlens_k=cu_seqlens_k,
+            max_seqlen_q=max_seqlen_q, max_seqlen_k=max_seqlen_k,
+            dropout_p=0.0, softmax_scale=None, causal=False
+        )
    if skip_reshape:
        b, _, _, dim_head = q.shape
    else:
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@ -13,13 +13,14 @@ if model_management.xformers_enabled_vae():
    import xformers
    import xformers.ops

+
 def torch_cat_if_needed(xl, dim):
    if len(xl) > 1:
        return torch.cat(xl, dim)
    else:
        return xl[0]

-def get_timestep_embedding(timesteps, embedding_dim):
+def get_timestep_embedding(timesteps, embedding_dim, flip_sin_to_cos = False, downscale_freq_shift = 1):
    """
    This matches the implementation in Denoising Diffusion Probabilistic Models:
    From Fairseq.
@ -30,11 +31,13 @@ def get_timestep_embedding(timesteps, embedding_dim):
    assert len(timesteps.shape) == 1

    half_dim = embedding_dim // 2
-    emb = math.log(10000) / (half_dim - 1)
+    emb = math.log(10000) / (half_dim - downscale_freq_shift)
    emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
    emb = emb.to(device=timesteps.device)
    emb = timesteps.float()[:, None] * emb[None, :]
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
+    if flip_sin_to_cos:
+        emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
    if embedding_dim % 2 == 1:  # zero pad
        emb = torch.nn.functional.pad(emb, (0,1,0,0))
    return emb
--- a/comfy/ldm/seedvr/model.py
+++ b/comfy/ldm/seedvr/model.py
--- a/comfy/ldm/seedvr/vae.py
+++ b/comfy/ldm/seedvr/vae.py
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -47,6 +47,8 @@ import comfy.ldm.chroma.model
 import comfy.ldm.chroma_radiance.model
 import comfy.ldm.ace.model
 import comfy.ldm.omnigen.omnigen2
+import comfy.ldm.seedvr.model
+
 import comfy.ldm.qwen_image.model
 import comfy.ldm.kandinsky5.model

@ -815,6 +817,16 @@ class HunyuanDiT(BaseModel):
        out['image_meta_size'] = comfy.conds.CONDRegular(torch.FloatTensor([[height, width, target_height, target_width, 0, 0]]))
        return out

+class SeedVR2(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+        super().__init__(model_config, model_type, device, comfy.ldm.seedvr.model.NaDiT)
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        condition = kwargs.get("condition", None)
+        if condition is not None:
+            out["condition"] = comfy.conds.CONDRegular(condition)
+        return out
+
 class PixArt(BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.pixart.pixartms.PixArtMS)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -449,6 +449,28 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        return dit_config

+    elif "{}blocks.36.mlp.all.proj_in_gate.weight".format(key_prefix) in state_dict_keys: # seedvr2 7b
+        dit_config = {}
+        dit_config["image_model"] = "seedvr2"
+        dit_config["vid_dim"] = 3072
+        dit_config["heads"] = 24
+        dit_config["num_layers"] = 36
+        dit_config["norm_eps"] = 1e-5
+        dit_config["qk_rope"] = True
+        dit_config["mlp_type"] = "normal"
+        return dit_config
+    elif "{}blocks.31.mlp.all.proj_in_gate.weight".format(key_prefix) in state_dict_keys: # seedvr2 3b
+        dit_config = {}
+        dit_config["image_model"] = "seedvr2"
+        dit_config["vid_dim"] = 2560
+        dit_config["heads"] = 20
+        dit_config["num_layers"] = 32
+        dit_config["norm_eps"] = 1.0e-05
+        dit_config["qk_rope"] = None
+        dit_config["mlp_type"] = "swiglu"
+        dit_config["vid_out_norm"] = True
+        return dit_config
+
    if '{}head.modulation'.format(key_prefix) in state_dict_keys:  # Wan 2.1
        dit_config = {}
        dit_config["image_model"] = "wan2.1"
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -16,6 +16,7 @@ import comfy.ldm.cosmos.vae
 import comfy.ldm.wan.vae
 import comfy.ldm.wan.vae2_2
 import comfy.ldm.hunyuan3d.vae
+import comfy.ldm.seedvr.vae
 import comfy.ldm.ace.vae.music_dcae_pipeline
 import comfy.ldm.hunyuan_video.vae
 import comfy.ldm.mmaudio.vae.autoencoder
@ -312,7 +313,10 @@ class CLIP:
 class VAE:
    def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None):
        if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
-            sd = diffusers_convert.convert_vae_state_dict(sd)
+            if (metadata is not None and metadata["keep_diffusers_format"] == "true"):
+                pass
+            else:
+                sd = diffusers_convert.convert_vae_state_dict(sd)

        if model_management.is_amd():
            VAE_KL_MEM_RATIO = 2.73
@ -379,6 +383,17 @@ class VAE:
                self.first_stage_model = StageC_coder()
                self.downscale_ratio = 32
                self.latent_channels = 16
+            elif "decoder.up_blocks.2.upsamplers.0.upscale_conv.weight" in sd: # seedvr2
+                self.first_stage_model = comfy.ldm.seedvr.vae.VideoAutoencoderKLWrapper()
+                self.memory_used_decode = lambda shape, dtype: (shape[1] * shape[2] * shape[3] * (4 * 8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (max(shape[1], 5) * shape[2] * shape[3]) * model_management.dtype_size(dtype)
+                self.working_dtypes = [torch.bfloat16, torch.float32]
+                self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
+                self.downscale_index_formula = (4, 8, 8)
+                self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
+                self.upscale_index_formula = (4, 8, 8)
+                self.process_input = lambda image: image
+                self.crop_input = False
            elif "decoder.conv_in.weight" in sd:
                if sd['decoder.conv_in.weight'].shape[1] == 64:
                    ddconfig = {"block_out_channels": [128, 256, 512, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 32, "downsample_match_channel": True, "upsample_match_channel": True}
@ -486,6 +501,7 @@ class VAE:
                self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
                self.downscale_index_formula = (8, 32, 32)
                self.working_dtypes = [torch.bfloat16, torch.float32]
+
            elif "decoder.conv_in.conv.weight" in sd and sd['decoder.conv_in.conv.weight'].shape[1] == 32:
                ddconfig = {"block_out_channels": [128, 256, 512, 1024, 1024], "in_channels": 3, "out_channels": 3, "num_res_blocks": 2, "ffactor_spatial": 16, "ffactor_temporal": 4, "downsample_match_channel": True, "upsample_match_channel": True}
                ddconfig['z_channels'] = sd["decoder.conv_in.conv.weight"].shape[1]
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -1303,6 +1303,25 @@ class Chroma(supported_models_base.BASE):
        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
        return supported_models_base.ClipTarget(comfy.text_encoders.pixart_t5.PixArtTokenizer, comfy.text_encoders.pixart_t5.pixart_te(**t5_detect))

+class SeedVR2(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "seedvr2"
+    }
+    latent_format = comfy.latent_formats.SeedVR2
+
+    vae_key_prefix = ["vae."]
+    text_encoder_key_prefix = ["text_encoders."]
+    supported_inference_dtypes = [torch.bfloat16, torch.float32]
+    sampling_settings = {
+        "shift": 1.0,
+    }
+
+    def get_model(self, state_dict, prefix = "", device=None):
+        out = model_base.SeedVR2(self, device=device)
+        return out
+    def clip_target(self, state_dict={}):
+        return None
+
 class ChromaRadiance(Chroma):
    unet_config = {
        "image_model": "chroma_radiance",
@ -1551,6 +1570,6 @@ class Kandinsky5Image(Kandinsky5):
        return supported_models_base.ClipTarget(comfy.text_encoders.kandinsky5.Kandinsky5TokenizerImage, comfy.text_encoders.kandinsky5.te(**hunyuan_detect))


-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5]
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, SeedVR2]

 models += [SVD_img2vid]
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -567,7 +567,7 @@ async def execute_lipsync(
    # Upload the audio file to Comfy API and get download URL
    if audio:
        audio_url = await upload_audio_to_comfyapi(
-            cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg", filename="output.mp3"
+            cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
        )
        logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
    else:
--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@ -55,7 +55,7 @@ def image_tensor_pair_to_batch(image1: torch.Tensor, image2: torch.Tensor) -> to

 def tensor_to_bytesio(
    image: torch.Tensor,
-    name: str | None = None,
+    *,
    total_pixels: int = 2048 * 2048,
    mime_type: str = "image/png",
 ) -> BytesIO:
@ -75,7 +75,7 @@ def tensor_to_bytesio(

    pil_image = tensor_to_pil(image, total_pixels=total_pixels)
    img_binary = pil_to_bytesio(pil_image, mime_type=mime_type)
-    img_binary.name = f"{name if name else uuid.uuid4()}.{mimetype_to_extension(mime_type)}"
+    img_binary.name = f"{uuid.uuid4()}.{mimetype_to_extension(mime_type)}"
    return img_binary


--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@ -82,7 +82,6 @@ async def upload_audio_to_comfyapi(
    container_format: str = "mp4",
    codec_name: str = "aac",
    mime_type: str = "audio/mp4",
-    filename: str = "uploaded_audio.mp4",
 ) -> str:
    """
    Uploads a single audio input to ComfyUI API and returns its download URL.
@ -92,7 +91,7 @@ async def upload_audio_to_comfyapi(
    waveform: torch.Tensor = audio["waveform"]
    audio_data_np = audio_tensor_to_contiguous_ndarray(waveform)
    audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, container_format, codec_name)
-    return await upload_file_to_comfyapi(cls, audio_bytes_io, filename, mime_type)
+    return await upload_file_to_comfyapi(cls, audio_bytes_io, f"{uuid.uuid4()}.{container_format}", mime_type)


 async def upload_video_to_comfyapi(
--- a/comfy_extras/nodes_seedvr.py
+++ b/comfy_extras/nodes_seedvr.py
@ -0,0 +1,465 @@
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+import torch
+import math
+from einops import rearrange
+
+import gc
+import comfy.model_management
+from comfy.utils import ProgressBar
+
+import torch.nn.functional as F
+from torchvision.transforms import functional as TVF
+from torchvision.transforms import Lambda, Normalize
+from torchvision.transforms.functional import InterpolationMode
+
+@torch.inference_mode()
+def tiled_vae(x, vae_model, tile_size=(512, 512), tile_overlap=(64, 64), temporal_size=16, encode=True):
+
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    x = x.to(next(vae_model.parameters()).dtype)
+    if x.ndim != 5:
+        x = x.unsqueeze(2)
+
+    b, c, d, h, w = x.shape
+
+    sf_s = getattr(vae_model, "spatial_downsample_factor", 8)
+    sf_t = getattr(vae_model, "temporal_downsample_factor", 4)
+
+    if encode:
+        ti_h, ti_w = tile_size
+        ov_h, ov_w = tile_overlap
+        target_d = (d + sf_t - 1) // sf_t
+        target_h = (h + sf_s - 1) // sf_s
+        target_w = (w + sf_s - 1) // sf_s
+    else:
+        ti_h = max(1, tile_size[0] // sf_s)
+        ti_w = max(1, tile_size[1] // sf_s)
+        ov_h = max(0, tile_overlap[0] // sf_s)
+        ov_w = max(0, tile_overlap[1] // sf_s)
+
+        target_d = d * sf_t
+        target_h = h * sf_s
+        target_w = w * sf_s
+
+    stride_h = max(1, ti_h - ov_h)
+    stride_w = max(1, ti_w - ov_w)
+
+    storage_device = vae_model.device
+    result = None
+    count = None
+
+    def run_temporal_chunks(spatial_tile):
+        chunk_results = []
+        t_dim_size = spatial_tile.shape[2]
+
+        if encode:
+            input_chunk = temporal_size
+        else:
+            input_chunk = max(1, temporal_size // sf_t)
+        for i in range(0, t_dim_size, input_chunk):
+            t_chunk = spatial_tile[:, :, i : i + input_chunk, :, :]
+            current_valid_len = t_chunk.shape[2]
+
+            pad_amount = 0
+            if current_valid_len < input_chunk:
+                pad_amount = input_chunk - current_valid_len
+
+                last_frame = t_chunk[:, :, -1:, :, :]
+                padding = last_frame.repeat(1, 1, pad_amount, 1, 1)
+
+                t_chunk = torch.cat([t_chunk, padding], dim=2)
+            t_chunk = t_chunk.contiguous()
+
+            if encode:
+                out = vae_model.encode(t_chunk)[0]
+            else:
+                out = vae_model.decode_(t_chunk)
+
+            if isinstance(out, (tuple, list)):
+                out = out[0]
+            if out.ndim == 4:
+                out = out.unsqueeze(2)
+
+            if pad_amount > 0:
+                if encode:
+                    expected_valid_out = (current_valid_len + sf_t - 1) // sf_t
+                    out = out[:, :, :expected_valid_out, :, :]
+
+                else:
+                    expected_valid_out = current_valid_len * sf_t
+                    out = out[:, :, :expected_valid_out, :, :]
+
+            chunk_results.append(out.to(storage_device))
+
+        return torch.cat(chunk_results, dim=2)
+
+    ramp_cache = {}
+    def get_ramp(steps):
+        if steps not in ramp_cache:
+            t = torch.linspace(0, 1, steps=steps, device=storage_device, dtype=torch.float32)
+            ramp_cache[steps] = 0.5 - 0.5 * torch.cos(t * torch.pi)
+        return ramp_cache[steps]
+
+    total_tiles = len(range(0, h, stride_h)) * len(range(0, w, stride_w))
+    bar = ProgressBar(total_tiles)
+
+    for y_idx in range(0, h, stride_h):
+        y_end = min(y_idx + ti_h, h)
+
+        for x_idx in range(0, w, stride_w):
+            x_end = min(x_idx + ti_w, w)
+
+            tile_x = x[:, :, :, y_idx:y_end, x_idx:x_end]
+
+            # Run VAE
+            tile_out = run_temporal_chunks(tile_x)
+
+            if result is None:
+                b_out, c_out = tile_out.shape[0], tile_out.shape[1]
+                result = torch.zeros((b_out, c_out, target_d, target_h, target_w), device=storage_device, dtype=torch.float32)
+                count = torch.zeros((1, 1, 1, target_h, target_w), device=storage_device, dtype=torch.float32)
+
+            if encode:
+                ys, ye = y_idx // sf_s, (y_idx // sf_s) + tile_out.shape[3]
+                xs, xe = x_idx // sf_s, (x_idx // sf_s) + tile_out.shape[4]
+                cur_ov_h = max(0, min(ov_h // sf_s, tile_out.shape[3] // 2))
+                cur_ov_w = max(0, min(ov_w // sf_s, tile_out.shape[4] // 2))
+            else:
+                ys, ye = y_idx * sf_s, (y_idx * sf_s) + tile_out.shape[3]
+                xs, xe = x_idx * sf_s, (x_idx * sf_s) + tile_out.shape[4]
+                cur_ov_h = max(0, min(ov_h, tile_out.shape[3] // 2))
+                cur_ov_w = max(0, min(ov_w, tile_out.shape[4] // 2))
+
+            w_h = torch.ones((tile_out.shape[3],), device=storage_device)
+            w_w = torch.ones((tile_out.shape[4],), device=storage_device)
+
+            if cur_ov_h > 0:
+                r = get_ramp(cur_ov_h)
+                if y_idx > 0:
+                    w_h[:cur_ov_h] = r
+                if y_end < h:
+                    w_h[-cur_ov_h:] = 1.0 - r
+
+            if cur_ov_w > 0:
+                r = get_ramp(cur_ov_w)
+                if x_idx > 0:
+                    w_w[:cur_ov_w] = r
+                if x_end < w:
+                    w_w[-cur_ov_w:] = 1.0 - r
+
+            final_weight = w_h.view(1,1,1,-1,1) * w_w.view(1,1,1,1,-1)
+
+            valid_d = min(tile_out.shape[2], result.shape[2])
+            tile_out = tile_out[:, :, :valid_d, :, :]
+
+            tile_out.mul_(final_weight)
+
+            result[:, :, :valid_d, ys:ye, xs:xe] += tile_out
+            count[:, :, :, ys:ye, xs:xe] += final_weight
+
+            del tile_out, final_weight, tile_x, w_h, w_w
+            bar.update(1)
+
+    result.div_(count.clamp(min=1e-6))
+
+    if result.device != x.device:
+        result = result.to(x.device).to(x.dtype)
+
+    if x.shape[2] == 1 and sf_t == 1:
+        result = result.squeeze(2)
+
+    return result
+
+def pad_video_temporal(videos: torch.Tensor, count: int = 0, temporal_dim: int = 1, prepend: bool = False):
+    t = videos.size(temporal_dim)
+
+    if count == 0 and not prepend:
+        if t % 4 == 1:
+            return videos
+        count = ((t - 1) // 4 + 1) * 4 + 1 - t
+
+    if count <= 0:
+        return videos
+
+    def select(start, end):
+        return videos[start:end] if temporal_dim == 0 else videos[:, start:end]
+
+    if count >= t:
+        repeat_count = count - t + 1
+        last = select(-1, None)
+
+        if temporal_dim == 0:
+            repeated = last.repeat(repeat_count, 1, 1, 1)
+            reversed_frames = select(1, None).flip(temporal_dim) if t > 1 else last[:0]
+        else:
+            repeated = last.expand(-1, repeat_count, -1, -1).contiguous()
+            reversed_frames = select(1, None).flip(temporal_dim) if t > 1 else last[:, :0]
+
+        return torch.cat([repeated, reversed_frames, videos] if prepend else
+                        [videos, reversed_frames, repeated], dim=temporal_dim)
+
+    if prepend:
+        reversed_frames = select(1, count+1).flip(temporal_dim)
+    else:
+        reversed_frames = select(-count-1, -1).flip(temporal_dim)
+
+    return torch.cat([reversed_frames, videos] if prepend else
+                    [videos, reversed_frames], dim=temporal_dim)
+
+def clear_vae_memory(vae_model):
+    for module in vae_model.modules():
+        if hasattr(module, "memory"):
+            module.memory = None
+    gc.collect()
+    torch.cuda.empty_cache()
+
+def expand_dims(tensor, ndim):
+    shape = tensor.shape + (1,) * (ndim - tensor.ndim)
+    return tensor.reshape(shape)
+
+def get_conditions(latent, latent_blur):
+    t, h, w, c = latent.shape
+    cond = torch.ones([t, h, w, c + 1], device=latent.device, dtype=latent.dtype)
+    cond[:, ..., :-1] = latent_blur[:]
+    cond[:, ..., -1:] = 1.0
+    return cond
+
+def timestep_transform(timesteps, latents_shapes):
+    vt = 4
+    vs = 8
+    frames = (latents_shapes[:, 0] - 1) * vt + 1
+    heights = latents_shapes[:, 1] * vs
+    widths = latents_shapes[:, 2] * vs
+
+    # Compute shift factor.
+    def get_lin_function(x1, y1, x2, y2):
+        m = (y2 - y1) / (x2 - x1)
+        b = y1 - m * x1
+        return lambda x: m * x + b
+
+    img_shift_fn = get_lin_function(x1=256 * 256, y1=1.0, x2=1024 * 1024, y2=3.2)
+    vid_shift_fn = get_lin_function(x1=256 * 256 * 37, y1=1.0, x2=1280 * 720 * 145, y2=5.0)
+    shift = torch.where(
+        frames > 1,
+        vid_shift_fn(heights * widths * frames),
+        img_shift_fn(heights * widths),
+    ).to(timesteps.device)
+
+    # Shift timesteps.
+    T = 1000.0
+    timesteps = timesteps / T
+    timesteps = shift * timesteps / (1 + (shift - 1) * timesteps)
+    timesteps = timesteps * T
+    return timesteps
+
+def inter(x_0, x_T, t):
+    t = expand_dims(t, x_0.ndim)
+    T = 1000.0
+    B = lambda t: t / T
+    A = lambda t: 1 - (t / T)
+    return A(t) * x_0 + B(t) * x_T
+def area_resize(image, max_area):
+
+    height, width = image.shape[-2:]
+    scale = math.sqrt(max_area / (height * width))
+
+    resized_height, resized_width = round(height * scale), round(width * scale)
+
+    return TVF.resize(
+        image,
+        size=(resized_height, resized_width),
+        interpolation=InterpolationMode.BICUBIC,
+    )
+
+def div_pad(image, factor):
+
+    height_factor, width_factor = factor
+    height, width = image.shape[-2:]
+
+    pad_height = (height_factor - (height % height_factor)) % height_factor
+    pad_width = (width_factor - (width % width_factor)) % width_factor
+
+    if pad_height == 0 and pad_width == 0:
+        return image
+
+    if isinstance(image, torch.Tensor):
+        padding = (0, pad_width, 0, pad_height)
+        image = torch.nn.functional.pad(image, padding, mode='constant', value=0.0)
+
+    return image
+
+def cut_videos(videos):
+    t = videos.size(1)
+    if t == 1:
+        return videos
+    if t <= 4 :
+        padding = [videos[:, -1].unsqueeze(1)] * (4 - t + 1)
+        padding = torch.cat(padding, dim=1)
+        videos = torch.cat([videos, padding], dim=1)
+        return videos
+    if (t - 1) % (4) == 0:
+        return videos
+    else:
+        padding = [videos[:, -1].unsqueeze(1)] * (
+            4 - ((t - 1) % (4))
+        )
+        padding = torch.cat(padding, dim=1)
+        videos = torch.cat([videos, padding], dim=1)
+        assert (videos.size(1) - 1) % (4) == 0
+        return videos
+
+def side_resize(image, size):
+    antialias = not (isinstance(image, torch.Tensor) and image.device.type == 'mps')
+    resized = TVF.resize(image, size, InterpolationMode.BICUBIC, antialias=antialias)
+    return resized
+
+class SeedVR2InputProcessing(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id = "SeedVR2InputProcessing",
+            category="image/video",
+            inputs = [
+                io.Image.Input("images"),
+                io.Vae.Input("vae"),
+                io.Int.Input("resolution", default = 1280, min = 120), # just non-zero value
+                io.Int.Input("spatial_tile_size", default = 512, min = 1),
+                io.Int.Input("spatial_overlap", default = 64, min = 1),
+                io.Int.Input("temporal_tile_size", default=5, min=1, max=16384, step=4),
+                io.Boolean.Input("enable_tiling", default=False),
+            ],
+            outputs = [
+                io.Latent.Output("vae_conditioning")
+            ]
+        )
+
+    @classmethod
+    def execute(cls, images, vae, resolution, spatial_tile_size, temporal_tile_size, spatial_overlap, enable_tiling):
+
+        comfy.model_management.load_models_gpu([vae.patcher])
+        vae_model = vae.first_stage_model
+        scale = 0.9152
+        shift = 0
+        if images.dim() != 5: # add the t dim
+            images = images.unsqueeze(0)
+        images = images.permute(0, 1, 4, 2, 3)
+
+        b, t, c, h, w = images.shape
+        images = images.reshape(b * t, c, h, w)
+
+        clip = Lambda(lambda x: torch.clamp(x, 0.0, 1.0))
+        normalize = Normalize(0.5, 0.5)
+        images = side_resize(images, resolution)
+
+        images = clip(images)
+        o_h, o_w = images.shape[-2:]
+        images = div_pad(images, (16, 16))
+        images = normalize(images)
+        _, _, new_h, new_w = images.shape
+
+        images = images.reshape(b, t, c, new_h, new_w)
+        images = cut_videos(images)
+
+        images = rearrange(images, "b t c h w -> b c t h w")
+
+        # in case users a non-compatiable number for tiling
+        def make_divisible(val, divisor):
+            return max(divisor, round(val / divisor) * divisor)
+
+        spatial_tile_size = make_divisible(spatial_tile_size, 32)
+        spatial_overlap = make_divisible(spatial_overlap, 32)
+
+        if spatial_overlap >= spatial_tile_size:
+            spatial_overlap = max(0, spatial_tile_size - 8)
+
+        args = {"tile_size": (spatial_tile_size, spatial_tile_size), "tile_overlap": (spatial_overlap, spatial_overlap),
+                "temporal_size":temporal_tile_size}
+        if enable_tiling:
+            latent = tiled_vae(images, vae_model, encode=True, **args)
+        else:
+            latent = vae_model.encode(images, orig_dims = [o_h, o_w])[0]
+
+        clear_vae_memory(vae_model)
+        #images = images.to(offload_device)
+        #vae_model = vae_model.to(offload_device)
+
+        vae_model.img_dims = [o_h, o_w]
+        args["enable_tiling"] = enable_tiling
+        vae_model.tiled_args = args
+        vae_model.original_image_video = images
+
+        latent = latent.unsqueeze(2) if latent.ndim == 4 else latent
+        latent = rearrange(latent, "b c ... -> b ... c")
+
+        latent = (latent - shift) * scale
+
+        return io.NodeOutput({"samples": latent})
+
+class SeedVR2Conditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedVR2Conditioning",
+            category="image/video",
+            inputs=[
+                io.Latent.Input("vae_conditioning"),
+                io.Model.Input("model"),
+            ],
+            outputs=[io.Conditioning.Output(display_name = "positive"),
+                     io.Conditioning.Output(display_name = "negative"),
+                     io.Latent.Output(display_name = "latent")],
+        )
+
+    @classmethod
+    def execute(cls, vae_conditioning, model) -> io.NodeOutput:
+
+        vae_conditioning = vae_conditioning["samples"]
+        device = vae_conditioning.device
+        model = model.model.diffusion_model
+        pos_cond = model.positive_conditioning
+        neg_cond = model.negative_conditioning
+
+        noises = torch.randn_like(vae_conditioning).to(device)
+        aug_noises =  torch.randn_like(vae_conditioning).to(device)
+        aug_noises = noises * 0.1 + aug_noises * 0.05
+        cond_noise_scale = 0.0
+        t = (
+            torch.tensor([1000.0])
+            * cond_noise_scale
+        ).to(device)
+        shape = torch.tensor(vae_conditioning.shape[1:]).to(device)[None] # avoid batch dim
+        t = timestep_transform(t, shape)
+        cond = inter(vae_conditioning, aug_noises, t)
+        condition = torch.stack([get_conditions(noise, c) for noise, c in zip(noises, cond)])
+        condition = condition.movedim(-1, 1)
+        noises = noises.movedim(-1, 1)
+
+        pos_shape = pos_cond.shape[0]
+        neg_shape = neg_cond.shape[0]
+        diff = abs(pos_shape - neg_shape)
+        if pos_shape > neg_shape:
+            neg_cond = F.pad(neg_cond, (0, 0, 0, diff))
+        else:
+            pos_cond = F.pad(pos_cond, (0, 0, 0, diff))
+
+        noises = rearrange(noises, "b c t h w -> b (c t) h w")
+        condition = rearrange(condition, "b c t h w -> b (c t) h w")
+
+        negative = [[neg_cond.unsqueeze(0), {"condition": condition}]]
+        positive = [[pos_cond.unsqueeze(0), {"condition": condition}]]
+
+        return io.NodeOutput(positive, negative, {"samples": noises})
+
+class SeedVRExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            SeedVR2Conditioning,
+            SeedVR2InputProcessing
+        ]
+
+async def comfy_entrypoint() -> SeedVRExtension:
+    return SeedVRExtension()
--- a/nodes.py
+++ b/nodes.py
@ -2359,6 +2359,7 @@ async def init_builtin_extra_nodes():
        "nodes_camera_trajectory.py",
        "nodes_edit_model.py",
        "nodes_tcfg.py",
+        "nodes_seedvr.py",
        "nodes_context_windows.py",
        "nodes_qwen.py",
        "nodes_chroma_radiance.py",
Author	SHA1	Message	Date
Yousef R. Gamaleldin	5f6232b6e0	Merge `fffb96ad42` into `c6238047ee`	2026-01-12 20:33:00 +08:00
comfyanonymous	c6238047ee	Put more details about portable in readme. (#11816 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-11 21:11:53 -05:00
Alexander Piskun	5cd1113236	fix(api-nodes): use a unique name for uploading audio files (#11778 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Generate Pydantic Stubs from api.comfy.org / generate-models (push) Has been cancelled Details	2026-01-11 03:07:11 -08:00
Jedrzej Kosinski	fffb96ad42	Merge branch 'master' into seedvr2	2026-01-09 03:56:50 -08:00
Yousef R. Gamaleldin	a506be2486	Merge branch 'master' into seedvr2	2026-01-09 01:00:06 +02:00
Yousef R. Gamaleldin	dbf8f9dcf9	Merge branch 'master' into seedvr2	2026-01-07 11:57:40 +02:00
Yousef Rafat	72ca18acc2	.	2026-01-04 20:32:38 +02:00
Yousef Rafat	f588e6c821	ruff	2026-01-04 20:30:24 +02:00
Yousef Rafat	0da072e098	Merge branch 'seedvr2' of https://github.com/yousef-rafat/ComfyUI into seedvr2	2026-01-04 19:17:00 +02:00
Yousef Rafat	31d358c78c	rope, attetntion update \| vae on cpu warning	2026-01-04 19:15:53 +02:00
Yousef R. Gamaleldin	4dd42ef1b7	Merge branch 'master' into seedvr2	2026-01-04 17:24:27 +02:00
Yousef R. Gamaleldin	02529c6d57	Merge branch 'master' into seedvr2	2025-12-31 20:20:32 +02:00
Yousef Rafat	49febe15c3	Merge branch 'seedvr2' of https://github.com/yousef-rafat/ComfyUI into seedvr2	2025-12-30 18:45:13 +02:00
Yousef Rafat	84fa155071	fixed manual vae loading	2025-12-30 18:44:57 +02:00
Jedrzej Kosinski	4691717340	Merge branch 'master' into seedvr2	2025-12-29 19:00:13 -08:00
Yousef Rafat	fadc7839cc	ruff	2025-12-26 23:14:33 +02:00
Yousef Rafat	3039c7ba14	tile edge case handles by padding vid	2025-12-26 23:12:45 +02:00
Yousef Rafat	9b573da39b	added other types of attention + compatibility with images	2025-12-26 21:16:36 +02:00
Yousef Rafat	4d7012ecda	.	2025-12-26 02:23:51 +02:00
Yousef Rafat	21bc67d7db	final changes	2025-12-26 02:08:59 +02:00
Yousef Rafat	7b2e5ef0af	outputs/speed/memory match custom node	2025-12-24 22:15:27 +02:00
Yousef Rafat	1afc2ed8e6	fixed the speed issue	2025-12-24 02:23:57 +02:00
Yousef Rafat	d41b1111eb	removed print statement	2025-12-23 12:36:10 +02:00
Yousef Rafat	5b0c80a093	ruff	2025-12-23 12:35:00 +02:00
Yousef Rafat	e30298dda2	..	2025-12-22 21:49:48 +02:00
Yousef Rafat	98b6bfcb71	revert file perm.	2025-12-22 21:46:40 +02:00
Yousef Rafat	fc5fabb629	.	2025-12-22 21:16:21 +02:00
Yousef Rafat	5db5da790f	remove cfg cutoff node	2025-12-22 21:15:12 +02:00
Yousef Rafat	a4e9d071e8	video works	2025-12-22 18:12:46 +02:00
Yousef Rafat	4fe772fae9	improvements	2025-12-20 23:20:45 +02:00
Yousef Rafat	0d2044a778	...	2025-12-19 20:28:09 +02:00
Yousef Rafat	7e62f8cc9f	added var length attention and fixed the vae issue	2025-12-19 20:23:39 +02:00
Yousef Rafat	74621b9d86	.	2025-12-18 14:52:10 +02:00
Yousef Rafat	db74a27870	fix vae issue	2025-12-18 14:13:41 +02:00
Yousef Rafat	acb9a11c6f	Merge branch 'seedvr2' of https://github.com/yousef-rafat/ComfyUI into seedvr2	2025-12-18 00:37:32 +02:00
Yousef Rafat	d9f71da998	works	2025-12-18 00:32:14 +02:00
Yousef R. Gamaleldin	183b377588	Merge branch 'master' into seedvr2	2025-12-17 00:39:06 +02:00
Yousef Rafat	ebd945ce3d	vae fix	2025-12-17 00:09:38 +02:00
Yousef Rafat	58e7cea796	lora, 7b model, cfg	2025-12-13 19:48:57 +02:00
Yousef Rafat	768c9cedf8	..	2025-12-12 20:51:40 +02:00
Yousef Rafat	d629c8f910	testing	2025-12-12 00:46:23 +02:00
Yousef Rafat	413ee3f687	.	2025-12-10 22:58:53 +02:00
Yousef Rafat	d12702ee0b	fixed some issues	2025-12-09 23:54:56 +02:00
Yousef Rafat	f030b3afc8	mostly fixing mistakes	2025-12-09 00:16:17 +02:00
Yousef Rafat	44a5bf353a	testing the model	2025-12-07 23:43:49 +02:00
Yousef Rafat	4b9332cc21	continue building nodes / testing vae	2025-12-07 21:41:14 +02:00
Yousef Rafat	041dbd6a8a	add nodes	2025-12-07 01:00:08 +02:00
Yousef Rafat	08d93555d0	init	2025-12-06 23:18:10 +02:00