Merge branch 'comfyanonymous:master' into master

This commit is contained in:
patientx 2025-05-29 10:56:01 +03:00 committed by GitHub
commit 46a997fb23
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 81 additions and 26 deletions

View File

@ -539,13 +539,20 @@ class WanModel(torch.nn.Module):
x = self.unpatchify(x, grid_sizes) x = self.unpatchify(x, grid_sizes)
return x return x
def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs): def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
bs, c, t, h, w = x.shape bs, c, t, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
patch_size = self.patch_size patch_size = self.patch_size
t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
h_len = ((h + (patch_size[1] // 2)) // patch_size[1]) h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
w_len = ((w + (patch_size[2] // 2)) // patch_size[2]) w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
if time_dim_concat is not None:
time_dim_concat = comfy.ldm.common_dit.pad_to_patch_size(time_dim_concat, self.patch_size)
x = torch.cat([x, time_dim_concat], dim=2)
t_len = ((x.shape[2] + (patch_size[0] // 2)) // patch_size[0])
img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1) img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1)
img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1) img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1)

View File

@ -283,8 +283,9 @@ def model_lora_keys_unet(model, key_map={}):
for k in sdk: for k in sdk:
if k.startswith("diffusion_model."): if k.startswith("diffusion_model."):
if k.endswith(".weight"): if k.endswith(".weight"):
key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_") key_lora = k[len("diffusion_model."):-len(".weight")]
key_map["lycoris_{}".format(key_lora)] = k #SimpleTuner lycoris format key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format
key_map["transformer.{}".format(key_lora)] = k #SimpleTuner regular format
if isinstance(model, comfy.model_base.ACEStep): if isinstance(model, comfy.model_base.ACEStep):
for k in sdk: for k in sdk:

View File

@ -1057,6 +1057,11 @@ class WAN21(BaseModel):
clip_vision_output = kwargs.get("clip_vision_output", None) clip_vision_output = kwargs.get("clip_vision_output", None)
if clip_vision_output is not None: if clip_vision_output is not None:
out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states) out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states)
time_dim_concat = kwargs.get("time_dim_concat", None)
if time_dim_concat is not None:
out['time_dim_concat'] = comfy.conds.CONDRegular(self.process_latent_in(time_dim_concat))
return out return out

View File

@ -6,40 +6,42 @@ Pika API docs: https://pika-827374fb.mintlify.app/api-reference
from __future__ import annotations from __future__ import annotations
import io import io
from typing import Optional, TypeVar
import logging import logging
import torch from typing import Optional, TypeVar
import numpy as np import numpy as np
import torch
from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
from comfy_api.input_impl import VideoFromFile
from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
from comfy_api_nodes.apinode_utils import (
download_url_to_video_output,
tensor_to_bytesio,
)
from comfy_api_nodes.apis import ( from comfy_api_nodes.apis import (
PikaBodyGenerate22T2vGenerate22T2vPost,
PikaGenerateResponse,
PikaBodyGenerate22I2vGenerate22I2vPost,
PikaVideoResponse,
PikaBodyGenerate22C2vGenerate22PikascenesPost,
IngredientsMode, IngredientsMode,
PikaDurationEnum, PikaBodyGenerate22C2vGenerate22PikascenesPost,
PikaResolutionEnum, PikaBodyGenerate22I2vGenerate22I2vPost,
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
PikaBodyGenerate22KeyframeGenerate22PikaframesPost, PikaBodyGenerate22KeyframeGenerate22PikaframesPost,
PikaBodyGenerate22T2vGenerate22T2vPost,
PikaBodyGeneratePikadditionsGeneratePikadditionsPost,
PikaBodyGeneratePikaffectsGeneratePikaffectsPost,
PikaBodyGeneratePikaswapsGeneratePikaswapsPost,
PikaDurationEnum,
Pikaffect, Pikaffect,
PikaGenerateResponse,
PikaResolutionEnum,
PikaVideoResponse,
) )
from comfy_api_nodes.apis.client import ( from comfy_api_nodes.apis.client import (
ApiEndpoint, ApiEndpoint,
HttpMethod,
SynchronousOperation,
PollingOperation,
EmptyRequest, EmptyRequest,
) HttpMethod,
from comfy_api_nodes.apinode_utils import ( PollingOperation,
tensor_to_bytesio, SynchronousOperation,
download_url_to_video_output,
) )
from comfy_api_nodes.mapper_utils import model_field_to_node_input from comfy_api_nodes.mapper_utils import model_field_to_node_input
from comfy_api.input_impl.video_types import VideoInput, VideoContainer, VideoCodec
from comfy_api.input_impl import VideoFromFile
from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions
R = TypeVar("R") R = TypeVar("R")
@ -204,6 +206,7 @@ class PikaImageToVideoV2_2(PikaNodeBase):
"hidden": { "hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG", "auth_token": "AUTH_TOKEN_COMFY_ORG",
"comfy_api_key": "API_KEY_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG",
"unique_id": "UNIQUE_ID",
}, },
} }
@ -457,7 +460,7 @@ class PikAdditionsNode(PikaNodeBase):
}, },
} }
DESCRIPTION = "Add any object or image into your video. Upload a video and specify what youd like to add to create a seamlessly integrated result." DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result."
def api_call( def api_call(
self, self,

View File

@ -345,6 +345,44 @@ class WanCameraImageToVideo:
out_latent["samples"] = latent out_latent["samples"] = latent
return (positive, negative, out_latent) return (positive, negative, out_latent)
class WanPhantomSubjectToVideo:
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"vae": ("VAE", ),
"width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
},
"optional": {"images": ("IMAGE", ),
}}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative_text", "negative_img_text", "latent")
FUNCTION = "encode"
CATEGORY = "conditioning/video_models"
def encode(self, positive, negative, vae, width, height, length, batch_size, images):
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
cond2 = negative
if images is not None:
images = comfy.utils.common_upscale(images[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
latent_images = []
for i in images:
latent_images += [vae.encode(i.unsqueeze(0)[:, :, :, :3])]
concat_latent_image = torch.cat(latent_images, dim=2)
positive = node_helpers.conditioning_set_values(positive, {"time_dim_concat": concat_latent_image})
cond2 = node_helpers.conditioning_set_values(negative, {"time_dim_concat": concat_latent_image})
negative = node_helpers.conditioning_set_values(negative, {"time_dim_concat": comfy.latent_formats.Wan21().process_out(torch.zeros_like(concat_latent_image))})
out_latent = {}
out_latent["samples"] = latent
return (positive, cond2, negative, out_latent)
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"WanImageToVideo": WanImageToVideo, "WanImageToVideo": WanImageToVideo,
"WanFunControlToVideo": WanFunControlToVideo, "WanFunControlToVideo": WanFunControlToVideo,
@ -353,4 +391,5 @@ NODE_CLASS_MAPPINGS = {
"WanVaceToVideo": WanVaceToVideo, "WanVaceToVideo": WanVaceToVideo,
"TrimVideoLatent": TrimVideoLatent, "TrimVideoLatent": TrimVideoLatent,
"WanCameraImageToVideo": WanCameraImageToVideo, "WanCameraImageToVideo": WanCameraImageToVideo,
"WanPhantomSubjectToVideo": WanPhantomSubjectToVideo,
} }