mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-25 13:50:15 +08:00
Compare commits
4 Commits
c0517eb85e
...
bb52415057
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb52415057 | ||
|
|
acbf08cd60 | ||
|
|
9ee905bc47 | ||
|
|
d56d374c96 |
@ -1248,22 +1248,23 @@ class WAN21_HuMo(WAN21):
|
||||
if audio_embed is not None:
|
||||
out['audio_embed'] = comfy.conds.CONDRegular(audio_embed)
|
||||
|
||||
if "c_concat" not in out: # 1.7B model
|
||||
reference_latents = kwargs.get("reference_latents", None)
|
||||
if reference_latents is not None:
|
||||
|
||||
if "c_concat" not in out and reference_latents is not None and reference_latents[0].shape[1] == 16: # 1.7B model
|
||||
out['reference_latent'] = comfy.conds.CONDRegular(self.process_latent_in(reference_latents[-1]))
|
||||
else:
|
||||
noise_shape = list(noise.shape)
|
||||
noise_shape[1] += 4
|
||||
concat_latent = torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)
|
||||
zero_vae_values_first = torch.tensor([0.8660, -0.4326, -0.0017, -0.4884, -0.5283, 0.9207, -0.9896, 0.4433, -0.5543, -0.0113, 0.5753, -0.6000, -0.8346, -0.3497, -0.1926, -0.6938]).view(1, 16, 1, 1, 1)
|
||||
zero_vae_values_second = torch.tensor([1.0869, -1.2370, 0.0206, -0.4357, -0.6411, 2.0307, -1.5972, 1.2659, -0.8595, -0.4654, 0.9638, -1.6330, -1.4310, -0.1098, -0.3856, -1.4583]).view(1, 16, 1, 1, 1)
|
||||
zero_vae_values = torch.tensor([0.8642, -1.8583, 0.1577, 0.1350, -0.3641, 2.5863, -1.9670, 1.6065, -1.0475, -0.8678, 1.1734, -1.8138, -1.5933, -0.7721, -0.3289, -1.3745]).view(1, 16, 1, 1, 1)
|
||||
concat_latent[:, 4:] = zero_vae_values
|
||||
concat_latent[:, 4:, :1] = zero_vae_values_first
|
||||
concat_latent[:, 4:, 1:2] = zero_vae_values_second
|
||||
out['c_concat'] = comfy.conds.CONDNoiseShape(concat_latent)
|
||||
reference_latents = kwargs.get("reference_latents", None)
|
||||
else:
|
||||
concat_latent_image = kwargs.get("concat_latent_image", None)
|
||||
if concat_latent_image is None:
|
||||
noise_shape = list(noise.shape)
|
||||
noise_shape[1] += 4
|
||||
concat_latent = torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)
|
||||
zero_vae_values_first = torch.tensor([0.8660, -0.4326, -0.0017, -0.4884, -0.5283, 0.9207, -0.9896, 0.4433, -0.5543, -0.0113, 0.5753, -0.6000, -0.8346, -0.3497, -0.1926, -0.6938]).view(1, 16, 1, 1, 1)
|
||||
zero_vae_values_second = torch.tensor([1.0869, -1.2370, 0.0206, -0.4357, -0.6411, 2.0307, -1.5972, 1.2659, -0.8595, -0.4654, 0.9638, -1.6330, -1.4310, -0.1098, -0.3856, -1.4583]).view(1, 16, 1, 1, 1)
|
||||
zero_vae_values = torch.tensor([0.8642, -1.8583, 0.1577, 0.1350, -0.3641, 2.5863, -1.9670, 1.6065, -1.0475, -0.8678, 1.1734, -1.8138, -1.5933, -0.7721, -0.3289, -1.3745]).view(1, 16, 1, 1, 1)
|
||||
concat_latent[:, 4:] = zero_vae_values
|
||||
concat_latent[:, 4:, :1] = zero_vae_values_first
|
||||
concat_latent[:, 4:, 1:2] = zero_vae_values_second
|
||||
out['c_concat'] = comfy.conds.CONDNoiseShape(concat_latent)
|
||||
if reference_latents is not None:
|
||||
ref_latent = self.process_latent_in(reference_latents[-1])
|
||||
ref_latent_shape = list(ref_latent.shape)
|
||||
|
||||
@ -807,6 +807,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
|
||||
),
|
||||
IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
|
||||
IO.Combo.Input("duration", options=[5, 10]),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
@ -826,6 +827,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
duration: int,
|
||||
resolution: str = "1080p",
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
response = await sync_op(
|
||||
@ -837,6 +839,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect_ratio,
|
||||
duration=str(duration),
|
||||
mode="pro" if resolution == "1080p" else "std",
|
||||
),
|
||||
)
|
||||
return await finish_omni_video_task(cls, response)
|
||||
@ -872,6 +875,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
|
||||
optional=True,
|
||||
tooltip="Up to 6 additional reference images.",
|
||||
),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
@ -893,6 +897,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
|
||||
first_frame: Input.Image,
|
||||
end_frame: Input.Image | None = None,
|
||||
reference_images: Input.Image | None = None,
|
||||
resolution: str = "1080p",
|
||||
) -> IO.NodeOutput:
|
||||
prompt = normalize_omni_prompt_references(prompt)
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
@ -936,6 +941,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
|
||||
prompt=prompt,
|
||||
duration=str(duration),
|
||||
image_list=image_list,
|
||||
mode="pro" if resolution == "1080p" else "std",
|
||||
),
|
||||
)
|
||||
return await finish_omni_video_task(cls, response)
|
||||
@ -964,6 +970,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
|
||||
"reference_images",
|
||||
tooltip="Up to 7 reference images.",
|
||||
),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
@ -984,6 +991,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
|
||||
aspect_ratio: str,
|
||||
duration: int,
|
||||
reference_images: Input.Image,
|
||||
resolution: str = "1080p",
|
||||
) -> IO.NodeOutput:
|
||||
prompt = normalize_omni_prompt_references(prompt)
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
@ -1005,6 +1013,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
|
||||
aspect_ratio=aspect_ratio,
|
||||
duration=str(duration),
|
||||
image_list=image_list,
|
||||
mode="pro" if resolution == "1080p" else "std",
|
||||
),
|
||||
)
|
||||
return await finish_omni_video_task(cls, response)
|
||||
@ -1036,6 +1045,7 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
|
||||
tooltip="Up to 4 additional reference images.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
@ -1058,6 +1068,7 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
|
||||
reference_video: Input.Video,
|
||||
keep_original_sound: bool,
|
||||
reference_images: Input.Image | None = None,
|
||||
resolution: str = "1080p",
|
||||
) -> IO.NodeOutput:
|
||||
prompt = normalize_omni_prompt_references(prompt)
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
@ -1090,6 +1101,7 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
|
||||
duration=str(duration),
|
||||
image_list=image_list if image_list else None,
|
||||
video_list=video_list,
|
||||
mode="pro" if resolution == "1080p" else "std",
|
||||
),
|
||||
)
|
||||
return await finish_omni_video_task(cls, response)
|
||||
@ -1119,6 +1131,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
tooltip="Up to 4 additional reference images.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
@ -1139,6 +1152,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
video: Input.Video,
|
||||
keep_original_sound: bool,
|
||||
reference_images: Input.Image | None = None,
|
||||
resolution: str = "1080p",
|
||||
) -> IO.NodeOutput:
|
||||
prompt = normalize_omni_prompt_references(prompt)
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
@ -1171,6 +1185,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
duration=None,
|
||||
image_list=image_list if image_list else None,
|
||||
video_list=video_list,
|
||||
mode="pro" if resolution == "1080p" else "std",
|
||||
),
|
||||
)
|
||||
return await finish_omni_video_task(cls, response)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user