Merge branch 'comfyanonymous:master' into master

2026-01-24 13:20:19 +08:00 · 2025-05-29 10:56:01 +03:00 · 2025-05-29 10:56:01 +03:00 · 46a997fb23
commit 46a997fb23
parent 1eb2e45854 fb83eda287
5 changed files with 81 additions and 26 deletions
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@ -539,13 +539,20 @@ class WanModel(torch.nn.Module):
        x = self.unpatchify(x, grid_sizes)
        return x
-    def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs):
+    def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
        bs, c, t, h, w = x.shape
        x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
        patch_size = self.patch_size
        t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
        h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
        w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
        if time_dim_concat is not None:
            time_dim_concat = comfy.ldm.common_dit.pad_to_patch_size(time_dim_concat, self.patch_size)
            x = torch.cat([x, time_dim_concat], dim=2)
            t_len = ((x.shape[2] + (patch_size[0] // 2)) // patch_size[0])
        img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype)
        img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1)
        img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1)
--- a/comfy/lora.py
+++ b/comfy/lora.py
@ -283,8 +283,9 @@ def model_lora_keys_unet(model, key_map={}):
        for k in sdk:
            if k.startswith("diffusion_model."):
                if k.endswith(".weight"):
-                    key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
+                    key_lora = k[len("diffusion_model."):-len(".weight")]
-                    key_map["lycoris_{}".format(key_lora)] = k #SimpleTuner lycoris format
+                    key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format
                    key_map["transformer.{}".format(key_lora)] = k #SimpleTuner regular format
    if isinstance(model, comfy.model_base.ACEStep):
        for k in sdk:
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -1057,6 +1057,11 @@ class WAN21(BaseModel):
        clip_vision_output = kwargs.get("clip_vision_output", None)
        if clip_vision_output is not None:
            out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states)
        time_dim_concat = kwargs.get("time_dim_concat", None)
        if time_dim_concat is not None:
            out['time_dim_concat'] = comfy.conds.CONDRegular(self.process_latent_in(time_dim_concat))
        return out
--- a/comfy_api_nodes/nodes_pika.py
+++ b/comfy_api_nodes/nodes_pika.py
@ -6,40 +6,42 @@ Pika API docs: https://pika-827374fb.mintlify.app/api-reference
 from __future__ import annotations
 import io
 from typing import Optional, TypeVar
 import logging
-import torch
+from typing import Optional, TypeVar
 import numpy as np
 import torch
 from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
 from comfy_api.input_impl import VideoFromFile
 from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
 from comfy_api_nodes.apinode_utils import (
    download_url_to_video_output,
    tensor_to_bytesio,
 )
 from comfy_api_nodes.apis import (
    PikaBodyGenerate22T2vGenerate22T2vPost,
    PikaGenerateResponse,
    PikaBodyGenerate22I2vGenerate22I2vPost,
    PikaVideoResponse,
    PikaBodyGenerate22C2vGenerate22PikascenesPost,
    IngredientsMode,
-    PikaDurationEnum,
+    PikaBodyGenerate22C2vGenerate22PikascenesPost,
-    PikaResolutionEnum,
+    PikaBodyGenerate22I2vGenerate22I2vPost,
    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
    PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
    PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
    PikaBodyGenerate22KeyframeGenerate22PikaframesPost,
    PikaBodyGenerate22T2vGenerate22T2vPost,
    PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
    PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
    PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
    PikaDurationEnum,
    Pikaffect,
    PikaGenerateResponse,
    PikaResolutionEnum,
    PikaVideoResponse,
 )
 from comfy_api_nodes.apis.client import (
    ApiEndpoint,
    HttpMethod,
    SynchronousOperation,
    PollingOperation,
    EmptyRequest,
-)
+    HttpMethod,
-from comfy_api_nodes.apinode_utils import (
+    PollingOperation,
-    tensor_to_bytesio,
+    SynchronousOperation,
    download_url_to_video_output,
 )
 from comfy_api_nodes.mapper_utils import model_field_to_node_input
 from comfy_api.input_impl.video_types import VideoInput, VideoContainer, VideoCodec
 from comfy_api.input_impl import VideoFromFile
 from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
 R = TypeVar("R")
@ -204,6 +206,7 @@ class PikaImageToVideoV2_2(PikaNodeBase):
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
                "unique_id": "UNIQUE_ID",
            },
        }
@ -457,7 +460,7 @@ class PikAdditionsNode(PikaNodeBase):
            },
        }
-    DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you’d like to add to create a seamlessly integrated result."
+    DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result."
    def api_call(
        self,
--- a/comfy_extras/nodes_wan.py
+++ b/comfy_extras/nodes_wan.py
@ -345,6 +345,44 @@ class WanCameraImageToVideo:
        out_latent["samples"] = latent
        return (positive, negative, out_latent)
 class WanPhantomSubjectToVideo:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {"positive": ("CONDITIONING", ),
                             "negative": ("CONDITIONING", ),
                             "vae": ("VAE", ),
                             "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
                             "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
                             "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
                },
                "optional": {"images": ("IMAGE", ),
                }}
    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "CONDITIONING", "LATENT")
    RETURN_NAMES = ("positive", "negative_text", "negative_img_text", "latent")
    FUNCTION = "encode"
    CATEGORY = "conditioning/video_models"
    def encode(self, positive, negative, vae, width, height, length, batch_size, images):
        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
        cond2 = negative
        if images is not None:
            images = comfy.utils.common_upscale(images[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
            latent_images = []
            for i in images:
                latent_images += [vae.encode(i.unsqueeze(0)[:, :, :, :3])]
            concat_latent_image = torch.cat(latent_images, dim=2)
            positive = node_helpers.conditioning_set_values(positive, {"time_dim_concat": concat_latent_image})
            cond2 = node_helpers.conditioning_set_values(negative, {"time_dim_concat": concat_latent_image})
            negative = node_helpers.conditioning_set_values(negative, {"time_dim_concat": comfy.latent_formats.Wan21().process_out(torch.zeros_like(concat_latent_image))})
        out_latent = {}
        out_latent["samples"] = latent
        return (positive, cond2, negative, out_latent)
 NODE_CLASS_MAPPINGS = {
    "WanImageToVideo": WanImageToVideo,
    "WanFunControlToVideo": WanFunControlToVideo,
@ -353,4 +391,5 @@ NODE_CLASS_MAPPINGS = {
    "WanVaceToVideo": WanVaceToVideo,
    "TrimVideoLatent": TrimVideoLatent,
    "WanCameraImageToVideo": WanCameraImageToVideo,
    "WanPhantomSubjectToVideo": WanPhantomSubjectToVideo,
 }