mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-26 14:20:27 +08:00
Merge branch 'comfyanonymous:master' into master
This commit is contained in:
commit
c6b0bf480f
@ -1110,9 +1110,10 @@ class WAN21(BaseModel):
|
|||||||
shape_image[1] = extra_channels
|
shape_image[1] = extra_channels
|
||||||
image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device)
|
image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device)
|
||||||
else:
|
else:
|
||||||
|
latent_dim = self.latent_format.latent_channels
|
||||||
image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
|
image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
|
||||||
for i in range(0, image.shape[1], 16):
|
for i in range(0, image.shape[1], latent_dim):
|
||||||
image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16])
|
image[:, i: i + latent_dim] = self.process_latent_in(image[:, i: i + latent_dim])
|
||||||
image = utils.resize_to_batch_size(image, noise.shape[0])
|
image = utils.resize_to_batch_size(image, noise.shape[0])
|
||||||
|
|
||||||
if extra_channels != image.shape[1] + 4:
|
if extra_channels != image.shape[1] + 4:
|
||||||
@ -1245,18 +1246,14 @@ class WAN22_S2V(WAN21):
|
|||||||
out['reference_motion'] = reference_motion.shape
|
out['reference_motion'] = reference_motion.shape
|
||||||
return out
|
return out
|
||||||
|
|
||||||
class WAN22(BaseModel):
|
class WAN22(WAN21):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
|
super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
|
||||||
self.image_to_video = image_to_video
|
self.image_to_video = image_to_video
|
||||||
|
|
||||||
def extra_conds(self, **kwargs):
|
def extra_conds(self, **kwargs):
|
||||||
out = super().extra_conds(**kwargs)
|
out = super().extra_conds(**kwargs)
|
||||||
cross_attn = kwargs.get("cross_attn", None)
|
denoise_mask = kwargs.get("denoise_mask", None)
|
||||||
if cross_attn is not None:
|
|
||||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
|
||||||
|
|
||||||
denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
|
|
||||||
if denoise_mask is not None:
|
if denoise_mask is not None:
|
||||||
out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
|
out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
|
||||||
return out
|
return out
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import comfy.utils
|
import comfy.utils
|
||||||
import comfy_extras.nodes_post_processing
|
import comfy_extras.nodes_post_processing
|
||||||
import torch
|
import torch
|
||||||
|
import nodes
|
||||||
|
|
||||||
|
|
||||||
def reshape_latent_to(target_shape, latent, repeat_batch=True):
|
def reshape_latent_to(target_shape, latent, repeat_batch=True):
|
||||||
@ -137,6 +138,41 @@ class LatentConcat:
|
|||||||
samples_out["samples"] = torch.cat(c, dim=dim)
|
samples_out["samples"] = torch.cat(c, dim=dim)
|
||||||
return (samples_out,)
|
return (samples_out,)
|
||||||
|
|
||||||
|
class LatentCut:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {"samples": ("LATENT",),
|
||||||
|
"dim": (["x", "y", "t"], ),
|
||||||
|
"index": ("INT", {"default": 0, "min": -nodes.MAX_RESOLUTION, "max": nodes.MAX_RESOLUTION, "step": 1}),
|
||||||
|
"amount": ("INT", {"default": 1, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 1})}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("LATENT",)
|
||||||
|
FUNCTION = "op"
|
||||||
|
|
||||||
|
CATEGORY = "latent/advanced"
|
||||||
|
|
||||||
|
def op(self, samples, dim, index, amount):
|
||||||
|
samples_out = samples.copy()
|
||||||
|
|
||||||
|
s1 = samples["samples"]
|
||||||
|
|
||||||
|
if "x" in dim:
|
||||||
|
dim = s1.ndim - 1
|
||||||
|
elif "y" in dim:
|
||||||
|
dim = s1.ndim - 2
|
||||||
|
elif "t" in dim:
|
||||||
|
dim = s1.ndim - 3
|
||||||
|
|
||||||
|
if index >= 0:
|
||||||
|
index = min(index, s1.shape[dim] - 1)
|
||||||
|
amount = min(s1.shape[dim] - index, amount)
|
||||||
|
else:
|
||||||
|
index = max(index, -s1.shape[dim])
|
||||||
|
amount = min(-index, amount)
|
||||||
|
|
||||||
|
samples_out["samples"] = torch.narrow(s1, dim, index, amount)
|
||||||
|
return (samples_out,)
|
||||||
|
|
||||||
class LatentBatch:
|
class LatentBatch:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -312,6 +348,7 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"LatentMultiply": LatentMultiply,
|
"LatentMultiply": LatentMultiply,
|
||||||
"LatentInterpolate": LatentInterpolate,
|
"LatentInterpolate": LatentInterpolate,
|
||||||
"LatentConcat": LatentConcat,
|
"LatentConcat": LatentConcat,
|
||||||
|
"LatentCut": LatentCut,
|
||||||
"LatentBatch": LatentBatch,
|
"LatentBatch": LatentBatch,
|
||||||
"LatentBatchSeedBehavior": LatentBatchSeedBehavior,
|
"LatentBatchSeedBehavior": LatentBatchSeedBehavior,
|
||||||
"LatentApplyOperation": LatentApplyOperation,
|
"LatentApplyOperation": LatentApplyOperation,
|
||||||
|
|||||||
@ -139,16 +139,21 @@ class Wan22FunControlToVideo(io.ComfyNode):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def execute(cls, positive, negative, vae, width, height, length, batch_size, ref_image=None, start_image=None, control_video=None) -> io.NodeOutput:
|
def execute(cls, positive, negative, vae, width, height, length, batch_size, ref_image=None, start_image=None, control_video=None) -> io.NodeOutput:
|
||||||
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
|
spacial_scale = vae.spacial_compression_encode()
|
||||||
concat_latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
|
latent_channels = vae.latent_channels
|
||||||
concat_latent = comfy.latent_formats.Wan21().process_out(concat_latent)
|
latent = torch.zeros([batch_size, latent_channels, ((length - 1) // 4) + 1, height // spacial_scale, width // spacial_scale], device=comfy.model_management.intermediate_device())
|
||||||
|
concat_latent = torch.zeros([batch_size, latent_channels, ((length - 1) // 4) + 1, height // spacial_scale, width // spacial_scale], device=comfy.model_management.intermediate_device())
|
||||||
|
if latent_channels == 48:
|
||||||
|
concat_latent = comfy.latent_formats.Wan22().process_out(concat_latent)
|
||||||
|
else:
|
||||||
|
concat_latent = comfy.latent_formats.Wan21().process_out(concat_latent)
|
||||||
concat_latent = concat_latent.repeat(1, 2, 1, 1, 1)
|
concat_latent = concat_latent.repeat(1, 2, 1, 1, 1)
|
||||||
mask = torch.ones((1, 1, latent.shape[2] * 4, latent.shape[-2], latent.shape[-1]))
|
mask = torch.ones((1, 1, latent.shape[2] * 4, latent.shape[-2], latent.shape[-1]))
|
||||||
|
|
||||||
if start_image is not None:
|
if start_image is not None:
|
||||||
start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
||||||
concat_latent_image = vae.encode(start_image[:, :, :, :3])
|
concat_latent_image = vae.encode(start_image[:, :, :, :3])
|
||||||
concat_latent[:,16:,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]]
|
concat_latent[:,latent_channels:,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]]
|
||||||
mask[:, :, :start_image.shape[0] + 3] = 0.0
|
mask[:, :, :start_image.shape[0] + 3] = 0.0
|
||||||
|
|
||||||
ref_latent = None
|
ref_latent = None
|
||||||
@ -159,11 +164,11 @@ class Wan22FunControlToVideo(io.ComfyNode):
|
|||||||
if control_video is not None:
|
if control_video is not None:
|
||||||
control_video = comfy.utils.common_upscale(control_video[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
control_video = comfy.utils.common_upscale(control_video[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
||||||
concat_latent_image = vae.encode(control_video[:, :, :, :3])
|
concat_latent_image = vae.encode(control_video[:, :, :, :3])
|
||||||
concat_latent[:,:16,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]]
|
concat_latent[:,:latent_channels,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]]
|
||||||
|
|
||||||
mask = mask.view(1, mask.shape[2] // 4, 4, mask.shape[3], mask.shape[4]).transpose(1, 2)
|
mask = mask.view(1, mask.shape[2] // 4, 4, mask.shape[3], mask.shape[4]).transpose(1, 2)
|
||||||
positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent, "concat_mask": mask, "concat_mask_index": 16})
|
positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent, "concat_mask": mask, "concat_mask_index": latent_channels})
|
||||||
negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent, "concat_mask": mask, "concat_mask_index": 16})
|
negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent, "concat_mask": mask, "concat_mask_index": latent_channels})
|
||||||
|
|
||||||
if ref_latent is not None:
|
if ref_latent is not None:
|
||||||
positive = node_helpers.conditioning_set_values(positive, {"reference_latents": [ref_latent]}, append=True)
|
positive = node_helpers.conditioning_set_values(positive, {"reference_latents": [ref_latent]}, append=True)
|
||||||
@ -201,7 +206,8 @@ class WanFirstLastFrameToVideo(io.ComfyNode):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def execute(cls, positive, negative, vae, width, height, length, batch_size, start_image=None, end_image=None, clip_vision_start_image=None, clip_vision_end_image=None) -> io.NodeOutput:
|
def execute(cls, positive, negative, vae, width, height, length, batch_size, start_image=None, end_image=None, clip_vision_start_image=None, clip_vision_end_image=None) -> io.NodeOutput:
|
||||||
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
|
spacial_scale = vae.spacial_compression_encode()
|
||||||
|
latent = torch.zeros([batch_size, vae.latent_channels, ((length - 1) // 4) + 1, height // spacial_scale, width // spacial_scale], device=comfy.model_management.intermediate_device())
|
||||||
if start_image is not None:
|
if start_image is not None:
|
||||||
start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
|
||||||
if end_image is not None:
|
if end_image is not None:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user