mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 19:42:59 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
4b1aac74bb
@ -27,6 +27,7 @@ class Llama2Config:
|
|||||||
rms_norm_add = False
|
rms_norm_add = False
|
||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = False
|
qkv_bias = False
|
||||||
|
rope_dims = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Qwen25_3BConfig:
|
class Qwen25_3BConfig:
|
||||||
@ -44,6 +45,7 @@ class Qwen25_3BConfig:
|
|||||||
rms_norm_add = False
|
rms_norm_add = False
|
||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = True
|
qkv_bias = True
|
||||||
|
rope_dims = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Qwen25_7BVLI_Config:
|
class Qwen25_7BVLI_Config:
|
||||||
@ -61,6 +63,7 @@ class Qwen25_7BVLI_Config:
|
|||||||
rms_norm_add = False
|
rms_norm_add = False
|
||||||
mlp_activation = "silu"
|
mlp_activation = "silu"
|
||||||
qkv_bias = True
|
qkv_bias = True
|
||||||
|
rope_dims = [16, 24, 24]
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Gemma2_2B_Config:
|
class Gemma2_2B_Config:
|
||||||
@ -78,6 +81,7 @@ class Gemma2_2B_Config:
|
|||||||
rms_norm_add = True
|
rms_norm_add = True
|
||||||
mlp_activation = "gelu_pytorch_tanh"
|
mlp_activation = "gelu_pytorch_tanh"
|
||||||
qkv_bias = False
|
qkv_bias = False
|
||||||
|
rope_dims = None
|
||||||
|
|
||||||
class RMSNorm(nn.Module):
|
class RMSNorm(nn.Module):
|
||||||
def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
|
def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
|
||||||
@ -102,7 +106,7 @@ def rotate_half(x):
|
|||||||
return torch.cat((-x2, x1), dim=-1)
|
return torch.cat((-x2, x1), dim=-1)
|
||||||
|
|
||||||
|
|
||||||
def precompute_freqs_cis(head_dim, position_ids, theta, device=None):
|
def precompute_freqs_cis(head_dim, position_ids, theta, rope_dims=None, device=None):
|
||||||
theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
|
theta_numerator = torch.arange(0, head_dim, 2, device=device).float()
|
||||||
inv_freq = 1.0 / (theta ** (theta_numerator / head_dim))
|
inv_freq = 1.0 / (theta ** (theta_numerator / head_dim))
|
||||||
|
|
||||||
@ -112,12 +116,20 @@ def precompute_freqs_cis(head_dim, position_ids, theta, device=None):
|
|||||||
emb = torch.cat((freqs, freqs), dim=-1)
|
emb = torch.cat((freqs, freqs), dim=-1)
|
||||||
cos = emb.cos()
|
cos = emb.cos()
|
||||||
sin = emb.sin()
|
sin = emb.sin()
|
||||||
|
if rope_dims is not None and position_ids.shape[0] > 1:
|
||||||
|
mrope_section = rope_dims * 2
|
||||||
|
cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
|
||||||
|
sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0)
|
||||||
|
else:
|
||||||
|
cos = cos.unsqueeze(1)
|
||||||
|
sin = sin.unsqueeze(1)
|
||||||
|
|
||||||
return (cos, sin)
|
return (cos, sin)
|
||||||
|
|
||||||
|
|
||||||
def apply_rope(xq, xk, freqs_cis):
|
def apply_rope(xq, xk, freqs_cis):
|
||||||
cos = freqs_cis[0].unsqueeze(1)
|
cos = freqs_cis[0]
|
||||||
sin = freqs_cis[1].unsqueeze(1)
|
sin = freqs_cis[1]
|
||||||
q_embed = (xq * cos) + (rotate_half(xq) * sin)
|
q_embed = (xq * cos) + (rotate_half(xq) * sin)
|
||||||
k_embed = (xk * cos) + (rotate_half(xk) * sin)
|
k_embed = (xk * cos) + (rotate_half(xk) * sin)
|
||||||
return q_embed, k_embed
|
return q_embed, k_embed
|
||||||
@ -292,6 +304,7 @@ class Llama2_(nn.Module):
|
|||||||
freqs_cis = precompute_freqs_cis(self.config.head_dim,
|
freqs_cis = precompute_freqs_cis(self.config.head_dim,
|
||||||
position_ids,
|
position_ids,
|
||||||
self.config.rope_theta,
|
self.config.rope_theta,
|
||||||
|
self.config.rope_dims,
|
||||||
device=x.device)
|
device=x.device)
|
||||||
|
|
||||||
mask = None
|
mask = None
|
||||||
|
|||||||
16
comfy_api_nodes/apis/__init__.py
generated
16
comfy_api_nodes/apis/__init__.py
generated
@ -1315,6 +1315,7 @@ class KlingTaskStatus(str, Enum):
|
|||||||
class KlingTextToVideoModelName(str, Enum):
|
class KlingTextToVideoModelName(str, Enum):
|
||||||
kling_v1 = 'kling-v1'
|
kling_v1 = 'kling-v1'
|
||||||
kling_v1_6 = 'kling-v1-6'
|
kling_v1_6 = 'kling-v1-6'
|
||||||
|
kling_v2_1_master = 'kling-v2-1-master'
|
||||||
|
|
||||||
|
|
||||||
class KlingVideoGenAspectRatio(str, Enum):
|
class KlingVideoGenAspectRatio(str, Enum):
|
||||||
@ -1347,6 +1348,8 @@ class KlingVideoGenModelName(str, Enum):
|
|||||||
kling_v1_5 = 'kling-v1-5'
|
kling_v1_5 = 'kling-v1-5'
|
||||||
kling_v1_6 = 'kling-v1-6'
|
kling_v1_6 = 'kling-v1-6'
|
||||||
kling_v2_master = 'kling-v2-master'
|
kling_v2_master = 'kling-v2-master'
|
||||||
|
kling_v2_1 = 'kling-v2-1'
|
||||||
|
kling_v2_1_master = 'kling-v2-1-master'
|
||||||
|
|
||||||
|
|
||||||
class KlingVideoResult(BaseModel):
|
class KlingVideoResult(BaseModel):
|
||||||
@ -1620,13 +1623,14 @@ class MinimaxTaskResultResponse(BaseModel):
|
|||||||
task_id: str = Field(..., description='The task ID being queried.')
|
task_id: str = Field(..., description='The task ID being queried.')
|
||||||
|
|
||||||
|
|
||||||
class Model(str, Enum):
|
class MiniMaxModel(str, Enum):
|
||||||
T2V_01_Director = 'T2V-01-Director'
|
T2V_01_Director = 'T2V-01-Director'
|
||||||
I2V_01_Director = 'I2V-01-Director'
|
I2V_01_Director = 'I2V-01-Director'
|
||||||
S2V_01 = 'S2V-01'
|
S2V_01 = 'S2V-01'
|
||||||
I2V_01 = 'I2V-01'
|
I2V_01 = 'I2V-01'
|
||||||
I2V_01_live = 'I2V-01-live'
|
I2V_01_live = 'I2V-01-live'
|
||||||
T2V_01 = 'T2V-01'
|
T2V_01 = 'T2V-01'
|
||||||
|
Hailuo_02 = 'MiniMax-Hailuo-02'
|
||||||
|
|
||||||
|
|
||||||
class SubjectReferenceItem(BaseModel):
|
class SubjectReferenceItem(BaseModel):
|
||||||
@ -1648,7 +1652,7 @@ class MinimaxVideoGenerationRequest(BaseModel):
|
|||||||
None,
|
None,
|
||||||
description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.',
|
description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.',
|
||||||
)
|
)
|
||||||
model: Model = Field(
|
model: MiniMaxModel = Field(
|
||||||
...,
|
...,
|
||||||
description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01',
|
description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01',
|
||||||
)
|
)
|
||||||
@ -1665,6 +1669,14 @@ class MinimaxVideoGenerationRequest(BaseModel):
|
|||||||
None,
|
None,
|
||||||
description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.',
|
description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.',
|
||||||
)
|
)
|
||||||
|
duration: Optional[int] = Field(
|
||||||
|
None,
|
||||||
|
description="The length of the output video in seconds."
|
||||||
|
)
|
||||||
|
resolution: Optional[str] = Field(
|
||||||
|
None,
|
||||||
|
description="The dimensions of the video display. 1080p corresponds to 1920 x 1080 pixels, 768p corresponds to 1366 x 768 pixels."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MinimaxVideoGenerationResponse(BaseModel):
|
class MinimaxVideoGenerationResponse(BaseModel):
|
||||||
|
|||||||
@ -46,6 +46,8 @@ class GeminiModel(str, Enum):
|
|||||||
|
|
||||||
gemini_2_5_pro_preview_05_06 = "gemini-2.5-pro-preview-05-06"
|
gemini_2_5_pro_preview_05_06 = "gemini-2.5-pro-preview-05-06"
|
||||||
gemini_2_5_flash_preview_04_17 = "gemini-2.5-flash-preview-04-17"
|
gemini_2_5_flash_preview_04_17 = "gemini-2.5-flash-preview-04-17"
|
||||||
|
gemini_2_5_pro = "gemini-2.5-pro"
|
||||||
|
gemini_2_5_flash = "gemini-2.5-flash"
|
||||||
|
|
||||||
|
|
||||||
def get_gemini_endpoint(
|
def get_gemini_endpoint(
|
||||||
@ -97,7 +99,7 @@ class GeminiNode(ComfyNodeABC):
|
|||||||
{
|
{
|
||||||
"tooltip": "The Gemini model to use for generating responses.",
|
"tooltip": "The Gemini model to use for generating responses.",
|
||||||
"options": [model.value for model in GeminiModel],
|
"options": [model.value for model in GeminiModel],
|
||||||
"default": GeminiModel.gemini_2_5_pro_preview_05_06.value,
|
"default": GeminiModel.gemini_2_5_pro.value,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
"seed": (
|
"seed": (
|
||||||
|
|||||||
@ -421,6 +421,8 @@ class KlingTextToVideoNode(KlingNodeBase):
|
|||||||
"pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"),
|
"pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"),
|
||||||
"standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"),
|
"standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"),
|
||||||
"standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"),
|
"standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"),
|
||||||
|
"pro mode / 5s duration / kling-v2-1-master": ("pro", "5", "kling-v2-1-master"),
|
||||||
|
"pro mode / 10s duration / kling-v2-1-master": ("pro", "10", "kling-v2-1-master"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
from inspect import cleandoc
|
||||||
from typing import Union
|
from typing import Union
|
||||||
import logging
|
import logging
|
||||||
import torch
|
import torch
|
||||||
@ -10,7 +11,7 @@ from comfy_api_nodes.apis import (
|
|||||||
MinimaxFileRetrieveResponse,
|
MinimaxFileRetrieveResponse,
|
||||||
MinimaxTaskResultResponse,
|
MinimaxTaskResultResponse,
|
||||||
SubjectReferenceItem,
|
SubjectReferenceItem,
|
||||||
Model
|
MiniMaxModel
|
||||||
)
|
)
|
||||||
from comfy_api_nodes.apis.client import (
|
from comfy_api_nodes.apis.client import (
|
||||||
ApiEndpoint,
|
ApiEndpoint,
|
||||||
@ -84,7 +85,6 @@ class MinimaxTextToVideoNode:
|
|||||||
FUNCTION = "generate_video"
|
FUNCTION = "generate_video"
|
||||||
CATEGORY = "api node/video/MiniMax"
|
CATEGORY = "api node/video/MiniMax"
|
||||||
API_NODE = True
|
API_NODE = True
|
||||||
OUTPUT_NODE = True
|
|
||||||
|
|
||||||
async def generate_video(
|
async def generate_video(
|
||||||
self,
|
self,
|
||||||
@ -121,7 +121,7 @@ class MinimaxTextToVideoNode:
|
|||||||
response_model=MinimaxVideoGenerationResponse,
|
response_model=MinimaxVideoGenerationResponse,
|
||||||
),
|
),
|
||||||
request=MinimaxVideoGenerationRequest(
|
request=MinimaxVideoGenerationRequest(
|
||||||
model=Model(model),
|
model=MiniMaxModel(model),
|
||||||
prompt=prompt_text,
|
prompt=prompt_text,
|
||||||
callback_url=None,
|
callback_url=None,
|
||||||
first_frame_image=image_url,
|
first_frame_image=image_url,
|
||||||
@ -251,7 +251,6 @@ class MinimaxImageToVideoNode(MinimaxTextToVideoNode):
|
|||||||
FUNCTION = "generate_video"
|
FUNCTION = "generate_video"
|
||||||
CATEGORY = "api node/video/MiniMax"
|
CATEGORY = "api node/video/MiniMax"
|
||||||
API_NODE = True
|
API_NODE = True
|
||||||
OUTPUT_NODE = True
|
|
||||||
|
|
||||||
|
|
||||||
class MinimaxSubjectToVideoNode(MinimaxTextToVideoNode):
|
class MinimaxSubjectToVideoNode(MinimaxTextToVideoNode):
|
||||||
@ -313,7 +312,181 @@ class MinimaxSubjectToVideoNode(MinimaxTextToVideoNode):
|
|||||||
FUNCTION = "generate_video"
|
FUNCTION = "generate_video"
|
||||||
CATEGORY = "api node/video/MiniMax"
|
CATEGORY = "api node/video/MiniMax"
|
||||||
API_NODE = True
|
API_NODE = True
|
||||||
OUTPUT_NODE = True
|
|
||||||
|
|
||||||
|
class MinimaxHailuoVideoNode:
|
||||||
|
"""Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"prompt_text": (
|
||||||
|
"STRING",
|
||||||
|
{
|
||||||
|
"multiline": True,
|
||||||
|
"default": "",
|
||||||
|
"tooltip": "Text prompt to guide the video generation.",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"optional": {
|
||||||
|
"seed": (
|
||||||
|
IO.INT,
|
||||||
|
{
|
||||||
|
"default": 0,
|
||||||
|
"min": 0,
|
||||||
|
"max": 0xFFFFFFFFFFFFFFFF,
|
||||||
|
"control_after_generate": True,
|
||||||
|
"tooltip": "The random seed used for creating the noise.",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
"first_frame_image": (
|
||||||
|
IO.IMAGE,
|
||||||
|
{
|
||||||
|
"tooltip": "Optional image to use as the first frame to generate a video."
|
||||||
|
},
|
||||||
|
),
|
||||||
|
"prompt_optimizer": (
|
||||||
|
IO.BOOLEAN,
|
||||||
|
{
|
||||||
|
"tooltip": "Optimize prompt to improve generation quality when needed.",
|
||||||
|
"default": True,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
"duration": (
|
||||||
|
IO.COMBO,
|
||||||
|
{
|
||||||
|
"tooltip": "The length of the output video in seconds.",
|
||||||
|
"default": 6,
|
||||||
|
"options": [6, 10],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
"resolution": (
|
||||||
|
IO.COMBO,
|
||||||
|
{
|
||||||
|
"tooltip": "The dimensions of the video display. "
|
||||||
|
"1080p corresponds to 1920 x 1080 pixels, 768p corresponds to 1366 x 768 pixels.",
|
||||||
|
"default": "768P",
|
||||||
|
"options": ["768P", "1080P"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"hidden": {
|
||||||
|
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
||||||
|
"comfy_api_key": "API_KEY_COMFY_ORG",
|
||||||
|
"unique_id": "UNIQUE_ID",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("VIDEO",)
|
||||||
|
DESCRIPTION = cleandoc(__doc__ or "")
|
||||||
|
FUNCTION = "generate_video"
|
||||||
|
CATEGORY = "api node/video/MiniMax"
|
||||||
|
API_NODE = True
|
||||||
|
|
||||||
|
async def generate_video(
|
||||||
|
self,
|
||||||
|
prompt_text,
|
||||||
|
seed=0,
|
||||||
|
first_frame_image: torch.Tensor=None, # used for ImageToVideo
|
||||||
|
prompt_optimizer=True,
|
||||||
|
duration=6,
|
||||||
|
resolution="768P",
|
||||||
|
model="MiniMax-Hailuo-02",
|
||||||
|
unique_id: Union[str, None]=None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
if first_frame_image is None:
|
||||||
|
validate_string(prompt_text, field_name="prompt_text")
|
||||||
|
|
||||||
|
if model == "MiniMax-Hailuo-02" and resolution.upper() == "1080P" and duration != 6:
|
||||||
|
raise Exception(
|
||||||
|
"When model is MiniMax-Hailuo-02 and resolution is 1080P, duration is limited to 6 seconds."
|
||||||
|
)
|
||||||
|
|
||||||
|
# upload image, if passed in
|
||||||
|
image_url = None
|
||||||
|
if first_frame_image is not None:
|
||||||
|
image_url = (await upload_images_to_comfyapi(first_frame_image, max_images=1, auth_kwargs=kwargs))[0]
|
||||||
|
|
||||||
|
video_generate_operation = SynchronousOperation(
|
||||||
|
endpoint=ApiEndpoint(
|
||||||
|
path="/proxy/minimax/video_generation",
|
||||||
|
method=HttpMethod.POST,
|
||||||
|
request_model=MinimaxVideoGenerationRequest,
|
||||||
|
response_model=MinimaxVideoGenerationResponse,
|
||||||
|
),
|
||||||
|
request=MinimaxVideoGenerationRequest(
|
||||||
|
model=MiniMaxModel(model),
|
||||||
|
prompt=prompt_text,
|
||||||
|
callback_url=None,
|
||||||
|
first_frame_image=image_url,
|
||||||
|
prompt_optimizer=prompt_optimizer,
|
||||||
|
duration=duration,
|
||||||
|
resolution=resolution,
|
||||||
|
),
|
||||||
|
auth_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
response = await video_generate_operation.execute()
|
||||||
|
|
||||||
|
task_id = response.task_id
|
||||||
|
if not task_id:
|
||||||
|
raise Exception(f"MiniMax generation failed: {response.base_resp}")
|
||||||
|
|
||||||
|
average_duration = 120 if resolution == "768P" else 240
|
||||||
|
video_generate_operation = PollingOperation(
|
||||||
|
poll_endpoint=ApiEndpoint(
|
||||||
|
path="/proxy/minimax/query/video_generation",
|
||||||
|
method=HttpMethod.GET,
|
||||||
|
request_model=EmptyRequest,
|
||||||
|
response_model=MinimaxTaskResultResponse,
|
||||||
|
query_params={"task_id": task_id},
|
||||||
|
),
|
||||||
|
completed_statuses=["Success"],
|
||||||
|
failed_statuses=["Fail"],
|
||||||
|
status_extractor=lambda x: x.status.value,
|
||||||
|
estimated_duration=average_duration,
|
||||||
|
node_id=unique_id,
|
||||||
|
auth_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
task_result = await video_generate_operation.execute()
|
||||||
|
|
||||||
|
file_id = task_result.file_id
|
||||||
|
if file_id is None:
|
||||||
|
raise Exception("Request was not successful. Missing file ID.")
|
||||||
|
file_retrieve_operation = SynchronousOperation(
|
||||||
|
endpoint=ApiEndpoint(
|
||||||
|
path="/proxy/minimax/files/retrieve",
|
||||||
|
method=HttpMethod.GET,
|
||||||
|
request_model=EmptyRequest,
|
||||||
|
response_model=MinimaxFileRetrieveResponse,
|
||||||
|
query_params={"file_id": int(file_id)},
|
||||||
|
),
|
||||||
|
request=EmptyRequest(),
|
||||||
|
auth_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
file_result = await file_retrieve_operation.execute()
|
||||||
|
|
||||||
|
file_url = file_result.file.download_url
|
||||||
|
if file_url is None:
|
||||||
|
raise Exception(
|
||||||
|
f"No video was found in the response. Full response: {file_result.model_dump()}"
|
||||||
|
)
|
||||||
|
logging.info(f"Generated video URL: {file_url}")
|
||||||
|
if unique_id:
|
||||||
|
if hasattr(file_result.file, "backup_download_url"):
|
||||||
|
message = f"Result URL: {file_url}\nBackup URL: {file_result.file.backup_download_url}"
|
||||||
|
else:
|
||||||
|
message = f"Result URL: {file_url}"
|
||||||
|
PromptServer.instance.send_progress_text(message, unique_id)
|
||||||
|
|
||||||
|
video_io = await download_url_to_bytesio(file_url)
|
||||||
|
if video_io is None:
|
||||||
|
error_msg = f"Failed to download video from {file_url}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise Exception(error_msg)
|
||||||
|
return (VideoFromFile(video_io),)
|
||||||
|
|
||||||
|
|
||||||
# A dictionary that contains all nodes you want to export with their names
|
# A dictionary that contains all nodes you want to export with their names
|
||||||
@ -322,6 +495,7 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"MinimaxTextToVideoNode": MinimaxTextToVideoNode,
|
"MinimaxTextToVideoNode": MinimaxTextToVideoNode,
|
||||||
"MinimaxImageToVideoNode": MinimaxImageToVideoNode,
|
"MinimaxImageToVideoNode": MinimaxImageToVideoNode,
|
||||||
# "MinimaxSubjectToVideoNode": MinimaxSubjectToVideoNode,
|
# "MinimaxSubjectToVideoNode": MinimaxSubjectToVideoNode,
|
||||||
|
"MinimaxHailuoVideoNode": MinimaxHailuoVideoNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
# A dictionary that contains the friendly/humanly readable titles for the nodes
|
# A dictionary that contains the friendly/humanly readable titles for the nodes
|
||||||
@ -329,4 +503,5 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
|||||||
"MinimaxTextToVideoNode": "MiniMax Text to Video",
|
"MinimaxTextToVideoNode": "MiniMax Text to Video",
|
||||||
"MinimaxImageToVideoNode": "MiniMax Image to Video",
|
"MinimaxImageToVideoNode": "MiniMax Image to Video",
|
||||||
"MinimaxSubjectToVideoNode": "MiniMax Subject to Video",
|
"MinimaxSubjectToVideoNode": "MiniMax Subject to Video",
|
||||||
|
"MinimaxHailuoVideoNode": "MiniMax Hailuo Video",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -80,6 +80,9 @@ class SupportedOpenAIModel(str, Enum):
|
|||||||
gpt_4_1 = "gpt-4.1"
|
gpt_4_1 = "gpt-4.1"
|
||||||
gpt_4_1_mini = "gpt-4.1-mini"
|
gpt_4_1_mini = "gpt-4.1-mini"
|
||||||
gpt_4_1_nano = "gpt-4.1-nano"
|
gpt_4_1_nano = "gpt-4.1-nano"
|
||||||
|
gpt_5 = "gpt-5"
|
||||||
|
gpt_5_mini = "gpt-5-mini"
|
||||||
|
gpt_5_nano = "gpt-5-nano"
|
||||||
|
|
||||||
|
|
||||||
class OpenAIDalle2(ComfyNodeABC):
|
class OpenAIDalle2(ComfyNodeABC):
|
||||||
|
|||||||
622
comfy_api_nodes/nodes_vidu.py
Normal file
622
comfy_api_nodes/nodes_vidu.py
Normal file
@ -0,0 +1,622 @@
|
|||||||
|
import logging
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Callable, Optional, Literal, TypeVar
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from comfy_api.latest import ComfyExtension, io as comfy_io
|
||||||
|
from comfy_api_nodes.util.validation_utils import (
|
||||||
|
validate_aspect_ratio_closeness,
|
||||||
|
validate_image_dimensions,
|
||||||
|
validate_image_aspect_ratio_range,
|
||||||
|
get_number_of_images,
|
||||||
|
)
|
||||||
|
from comfy_api_nodes.apis.client import (
|
||||||
|
ApiEndpoint,
|
||||||
|
HttpMethod,
|
||||||
|
SynchronousOperation,
|
||||||
|
PollingOperation,
|
||||||
|
EmptyRequest,
|
||||||
|
)
|
||||||
|
from comfy_api_nodes.apinode_utils import download_url_to_video_output, upload_images_to_comfyapi
|
||||||
|
|
||||||
|
|
||||||
|
VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
|
||||||
|
VIDU_IMAGE_TO_VIDEO = "/proxy/vidu/img2video"
|
||||||
|
VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
|
||||||
|
VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
|
||||||
|
VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"
|
||||||
|
|
||||||
|
R = TypeVar("R")
|
||||||
|
|
||||||
|
class VideoModelName(str, Enum):
|
||||||
|
vidu_q1 = 'viduq1'
|
||||||
|
|
||||||
|
|
||||||
|
class AspectRatio(str, Enum):
|
||||||
|
r_16_9 = "16:9"
|
||||||
|
r_9_16 = "9:16"
|
||||||
|
r_1_1 = "1:1"
|
||||||
|
|
||||||
|
|
||||||
|
class Resolution(str, Enum):
|
||||||
|
r_1080p = "1080p"
|
||||||
|
|
||||||
|
|
||||||
|
class MovementAmplitude(str, Enum):
|
||||||
|
auto = "auto"
|
||||||
|
small = "small"
|
||||||
|
medium = "medium"
|
||||||
|
large = "large"
|
||||||
|
|
||||||
|
|
||||||
|
class TaskCreationRequest(BaseModel):
|
||||||
|
model: VideoModelName = VideoModelName.vidu_q1
|
||||||
|
prompt: Optional[str] = Field(None, max_length=1500)
|
||||||
|
duration: Optional[Literal[5]] = 5
|
||||||
|
seed: Optional[int] = Field(0, ge=0, le=2147483647)
|
||||||
|
aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
|
||||||
|
resolution: Optional[Resolution] = Resolution.r_1080p
|
||||||
|
movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
|
||||||
|
images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
|
||||||
|
|
||||||
|
|
||||||
|
class TaskStatus(str, Enum):
|
||||||
|
created = "created"
|
||||||
|
queueing = "queueing"
|
||||||
|
processing = "processing"
|
||||||
|
success = "success"
|
||||||
|
failed = "failed"
|
||||||
|
|
||||||
|
|
||||||
|
class TaskCreationResponse(BaseModel):
|
||||||
|
task_id: str = Field(...)
|
||||||
|
state: TaskStatus = Field(...)
|
||||||
|
created_at: str = Field(...)
|
||||||
|
code: Optional[int] = Field(None, description="Error code")
|
||||||
|
|
||||||
|
|
||||||
|
class TaskResult(BaseModel):
|
||||||
|
id: str = Field(..., description="Creation id")
|
||||||
|
url: str = Field(..., description="The URL of the generated results, valid for one hour")
|
||||||
|
cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
|
||||||
|
|
||||||
|
|
||||||
|
class TaskStatusResponse(BaseModel):
|
||||||
|
state: TaskStatus = Field(...)
|
||||||
|
err_code: Optional[str] = Field(None)
|
||||||
|
creations: list[TaskResult] = Field(..., description="Generated results")
|
||||||
|
|
||||||
|
|
||||||
|
async def poll_until_finished(
|
||||||
|
auth_kwargs: dict[str, str],
|
||||||
|
api_endpoint: ApiEndpoint[Any, R],
|
||||||
|
result_url_extractor: Optional[Callable[[R], str]] = None,
|
||||||
|
estimated_duration: Optional[int] = None,
|
||||||
|
node_id: Optional[str] = None,
|
||||||
|
) -> R:
|
||||||
|
return await PollingOperation(
|
||||||
|
poll_endpoint=api_endpoint,
|
||||||
|
completed_statuses=[TaskStatus.success.value],
|
||||||
|
failed_statuses=[TaskStatus.failed.value],
|
||||||
|
status_extractor=lambda response: response.state.value,
|
||||||
|
auth_kwargs=auth_kwargs,
|
||||||
|
result_url_extractor=result_url_extractor,
|
||||||
|
estimated_duration=estimated_duration,
|
||||||
|
node_id=node_id,
|
||||||
|
poll_interval=16.0,
|
||||||
|
max_poll_attempts=256,
|
||||||
|
).execute()
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_url_from_response(response) -> Optional[str]:
|
||||||
|
if response.creations:
|
||||||
|
return response.creations[0].url
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_from_response(response) -> TaskResult:
|
||||||
|
if not response.creations:
|
||||||
|
error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
|
||||||
|
logging.info(error_msg)
|
||||||
|
raise RuntimeError(error_msg)
|
||||||
|
logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
|
||||||
|
return response.creations[0]
|
||||||
|
|
||||||
|
|
||||||
|
async def execute_task(
|
||||||
|
vidu_endpoint: str,
|
||||||
|
auth_kwargs: Optional[dict[str, str]],
|
||||||
|
payload: TaskCreationRequest,
|
||||||
|
estimated_duration: int,
|
||||||
|
node_id: str,
|
||||||
|
) -> R:
|
||||||
|
response = await SynchronousOperation(
|
||||||
|
endpoint=ApiEndpoint(
|
||||||
|
path=vidu_endpoint,
|
||||||
|
method=HttpMethod.POST,
|
||||||
|
request_model=TaskCreationRequest,
|
||||||
|
response_model=TaskCreationResponse,
|
||||||
|
),
|
||||||
|
request=payload,
|
||||||
|
auth_kwargs=auth_kwargs,
|
||||||
|
).execute()
|
||||||
|
if response.state == TaskStatus.failed:
|
||||||
|
error_msg = f"Vidu request failed. Code: {response.code}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
raise RuntimeError(error_msg)
|
||||||
|
return await poll_until_finished(
|
||||||
|
auth_kwargs,
|
||||||
|
ApiEndpoint(
|
||||||
|
path=VIDU_GET_GENERATION_STATUS % response.task_id,
|
||||||
|
method=HttpMethod.GET,
|
||||||
|
request_model=EmptyRequest,
|
||||||
|
response_model=TaskStatusResponse,
|
||||||
|
),
|
||||||
|
result_url_extractor=get_video_url_from_response,
|
||||||
|
estimated_duration=estimated_duration,
|
||||||
|
node_id=node_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ViduTextToVideoNode(comfy_io.ComfyNode):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return comfy_io.Schema(
|
||||||
|
node_id="ViduTextToVideoNode",
|
||||||
|
display_name="Vidu Text To Video Generation",
|
||||||
|
category="api node/video/Vidu",
|
||||||
|
description="Generate video from text prompt",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"model",
|
||||||
|
options=[model.value for model in VideoModelName],
|
||||||
|
default=VideoModelName.vidu_q1.value,
|
||||||
|
tooltip="Model name",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="A textual description for video generation",
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"duration",
|
||||||
|
default=5,
|
||||||
|
min=5,
|
||||||
|
max=5,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
tooltip="Duration of the output video in seconds",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation (0 for random)",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"aspect_ratio",
|
||||||
|
options=[model.value for model in AspectRatio],
|
||||||
|
default=AspectRatio.r_16_9.value,
|
||||||
|
tooltip="The aspect ratio of the output video",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"resolution",
|
||||||
|
options=[model.value for model in Resolution],
|
||||||
|
default=Resolution.r_1080p.value,
|
||||||
|
tooltip="Supported values may vary by model & duration",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"movement_amplitude",
|
||||||
|
options=[model.value for model in MovementAmplitude],
|
||||||
|
default=MovementAmplitude.auto.value,
|
||||||
|
tooltip="The movement amplitude of objects in the frame",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
comfy_io.Video.Output(),
|
||||||
|
],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def execute(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
prompt: str,
|
||||||
|
duration: int,
|
||||||
|
seed: int,
|
||||||
|
aspect_ratio: str,
|
||||||
|
resolution: str,
|
||||||
|
movement_amplitude: str,
|
||||||
|
) -> comfy_io.NodeOutput:
|
||||||
|
if not prompt:
|
||||||
|
raise ValueError("The prompt field is required and cannot be empty.")
|
||||||
|
payload = TaskCreationRequest(
|
||||||
|
model_name=model,
|
||||||
|
prompt=prompt,
|
||||||
|
duration=duration,
|
||||||
|
seed=seed,
|
||||||
|
aspect_ratio=aspect_ratio,
|
||||||
|
resolution=resolution,
|
||||||
|
movement_amplitude=movement_amplitude,
|
||||||
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
results = await execute_task(VIDU_TEXT_TO_VIDEO, auth, payload, 320, cls.hidden.unique_id)
|
||||||
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
||||||
|
|
||||||
|
|
||||||
|
class ViduImageToVideoNode(comfy_io.ComfyNode):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return comfy_io.Schema(
|
||||||
|
node_id="ViduImageToVideoNode",
|
||||||
|
display_name="Vidu Image To Video Generation",
|
||||||
|
category="api node/video/Vidu",
|
||||||
|
description="Generate video from image and optional prompt",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"model",
|
||||||
|
options=[model.value for model in VideoModelName],
|
||||||
|
default=VideoModelName.vidu_q1.value,
|
||||||
|
tooltip="Model name",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"image",
|
||||||
|
tooltip="An image to be used as the start frame of the generated video",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
default="",
|
||||||
|
tooltip="A textual description for video generation",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"duration",
|
||||||
|
default=5,
|
||||||
|
min=5,
|
||||||
|
max=5,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
tooltip="Duration of the output video in seconds",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation (0 for random)",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"resolution",
|
||||||
|
options=[model.value for model in Resolution],
|
||||||
|
default=Resolution.r_1080p.value,
|
||||||
|
tooltip="Supported values may vary by model & duration",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"movement_amplitude",
|
||||||
|
options=[model.value for model in MovementAmplitude],
|
||||||
|
default=MovementAmplitude.auto.value,
|
||||||
|
tooltip="The movement amplitude of objects in the frame",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
comfy_io.Video.Output(),
|
||||||
|
],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def execute(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
image: torch.Tensor,
|
||||||
|
prompt: str,
|
||||||
|
duration: int,
|
||||||
|
seed: int,
|
||||||
|
resolution: str,
|
||||||
|
movement_amplitude: str,
|
||||||
|
) -> comfy_io.NodeOutput:
|
||||||
|
if get_number_of_images(image) > 1:
|
||||||
|
raise ValueError("Only one input image is allowed.")
|
||||||
|
validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
||||||
|
payload = TaskCreationRequest(
|
||||||
|
model_name=model,
|
||||||
|
prompt=prompt,
|
||||||
|
duration=duration,
|
||||||
|
seed=seed,
|
||||||
|
resolution=resolution,
|
||||||
|
movement_amplitude=movement_amplitude,
|
||||||
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
payload.images = await upload_images_to_comfyapi(
|
||||||
|
image,
|
||||||
|
max_images=1,
|
||||||
|
mime_type="image/png",
|
||||||
|
auth_kwargs=auth,
|
||||||
|
)
|
||||||
|
results = await execute_task(VIDU_IMAGE_TO_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
||||||
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
||||||
|
|
||||||
|
|
||||||
|
class ViduReferenceVideoNode(comfy_io.ComfyNode):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return comfy_io.Schema(
|
||||||
|
node_id="ViduReferenceVideoNode",
|
||||||
|
display_name="Vidu Reference To Video Generation",
|
||||||
|
category="api node/video/Vidu",
|
||||||
|
description="Generate video from multiple images and prompt",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"model",
|
||||||
|
options=[model.value for model in VideoModelName],
|
||||||
|
default=VideoModelName.vidu_q1.value,
|
||||||
|
tooltip="Model name",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"images",
|
||||||
|
tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="A textual description for video generation",
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"duration",
|
||||||
|
default=5,
|
||||||
|
min=5,
|
||||||
|
max=5,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
tooltip="Duration of the output video in seconds",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation (0 for random)",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"aspect_ratio",
|
||||||
|
options=[model.value for model in AspectRatio],
|
||||||
|
default=AspectRatio.r_16_9.value,
|
||||||
|
tooltip="The aspect ratio of the output video",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"resolution",
|
||||||
|
options=[model.value for model in Resolution],
|
||||||
|
default=Resolution.r_1080p.value,
|
||||||
|
tooltip="Supported values may vary by model & duration",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"movement_amplitude",
|
||||||
|
options=[model.value for model in MovementAmplitude],
|
||||||
|
default=MovementAmplitude.auto.value,
|
||||||
|
tooltip="The movement amplitude of objects in the frame",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
comfy_io.Video.Output(),
|
||||||
|
],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def execute(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
images: torch.Tensor,
|
||||||
|
prompt: str,
|
||||||
|
duration: int,
|
||||||
|
seed: int,
|
||||||
|
aspect_ratio: str,
|
||||||
|
resolution: str,
|
||||||
|
movement_amplitude: str,
|
||||||
|
) -> comfy_io.NodeOutput:
|
||||||
|
if not prompt:
|
||||||
|
raise ValueError("The prompt field is required and cannot be empty.")
|
||||||
|
a = get_number_of_images(images)
|
||||||
|
if a > 7:
|
||||||
|
raise ValueError("Too many images, maximum allowed is 7.")
|
||||||
|
for image in images:
|
||||||
|
validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
|
||||||
|
validate_image_dimensions(image, min_width=128, min_height=128)
|
||||||
|
payload = TaskCreationRequest(
|
||||||
|
model_name=model,
|
||||||
|
prompt=prompt,
|
||||||
|
duration=duration,
|
||||||
|
seed=seed,
|
||||||
|
aspect_ratio=aspect_ratio,
|
||||||
|
resolution=resolution,
|
||||||
|
movement_amplitude=movement_amplitude,
|
||||||
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
payload.images = await upload_images_to_comfyapi(
|
||||||
|
images,
|
||||||
|
max_images=7,
|
||||||
|
mime_type="image/png",
|
||||||
|
auth_kwargs=auth,
|
||||||
|
)
|
||||||
|
results = await execute_task(VIDU_REFERENCE_VIDEO, auth, payload, 120, cls.hidden.unique_id)
|
||||||
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
||||||
|
|
||||||
|
|
||||||
|
class ViduStartEndToVideoNode(comfy_io.ComfyNode):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return comfy_io.Schema(
|
||||||
|
node_id="ViduStartEndToVideoNode",
|
||||||
|
display_name="Vidu Start End To Video Generation",
|
||||||
|
category="api node/video/Vidu",
|
||||||
|
description="Generate a video from start and end frames and a prompt",
|
||||||
|
inputs=[
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"model",
|
||||||
|
options=[model.value for model in VideoModelName],
|
||||||
|
default=VideoModelName.vidu_q1.value,
|
||||||
|
tooltip="Model name",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"first_frame",
|
||||||
|
tooltip="Start frame",
|
||||||
|
),
|
||||||
|
comfy_io.Image.Input(
|
||||||
|
"end_frame",
|
||||||
|
tooltip="End frame",
|
||||||
|
),
|
||||||
|
comfy_io.String.Input(
|
||||||
|
"prompt",
|
||||||
|
multiline=True,
|
||||||
|
tooltip="A textual description for video generation",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"duration",
|
||||||
|
default=5,
|
||||||
|
min=5,
|
||||||
|
max=5,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
tooltip="Duration of the output video in seconds",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Int.Input(
|
||||||
|
"seed",
|
||||||
|
default=0,
|
||||||
|
min=0,
|
||||||
|
max=2147483647,
|
||||||
|
step=1,
|
||||||
|
display_mode=comfy_io.NumberDisplay.number,
|
||||||
|
control_after_generate=True,
|
||||||
|
tooltip="Seed for video generation (0 for random)",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"resolution",
|
||||||
|
options=[model.value for model in Resolution],
|
||||||
|
default=Resolution.r_1080p.value,
|
||||||
|
tooltip="Supported values may vary by model & duration",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
comfy_io.Combo.Input(
|
||||||
|
"movement_amplitude",
|
||||||
|
options=[model.value for model in MovementAmplitude],
|
||||||
|
default=MovementAmplitude.auto.value,
|
||||||
|
tooltip="The movement amplitude of objects in the frame",
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
comfy_io.Video.Output(),
|
||||||
|
],
|
||||||
|
hidden=[
|
||||||
|
comfy_io.Hidden.auth_token_comfy_org,
|
||||||
|
comfy_io.Hidden.api_key_comfy_org,
|
||||||
|
comfy_io.Hidden.unique_id,
|
||||||
|
],
|
||||||
|
is_api_node=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def execute(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
first_frame: torch.Tensor,
|
||||||
|
end_frame: torch.Tensor,
|
||||||
|
prompt: str,
|
||||||
|
duration: int,
|
||||||
|
seed: int,
|
||||||
|
resolution: str,
|
||||||
|
movement_amplitude: str,
|
||||||
|
) -> comfy_io.NodeOutput:
|
||||||
|
validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
|
||||||
|
payload = TaskCreationRequest(
|
||||||
|
model_name=model,
|
||||||
|
prompt=prompt,
|
||||||
|
duration=duration,
|
||||||
|
seed=seed,
|
||||||
|
resolution=resolution,
|
||||||
|
movement_amplitude=movement_amplitude,
|
||||||
|
)
|
||||||
|
auth = {
|
||||||
|
"auth_token": cls.hidden.auth_token_comfy_org,
|
||||||
|
"comfy_api_key": cls.hidden.api_key_comfy_org,
|
||||||
|
}
|
||||||
|
payload.images = [
|
||||||
|
(await upload_images_to_comfyapi(frame, max_images=1, mime_type="image/png", auth_kwargs=auth))[0]
|
||||||
|
for frame in (first_frame, end_frame)
|
||||||
|
]
|
||||||
|
results = await execute_task(VIDU_START_END_VIDEO, auth, payload, 96, cls.hidden.unique_id)
|
||||||
|
return comfy_io.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
|
||||||
|
|
||||||
|
|
||||||
|
class ViduExtension(ComfyExtension):
|
||||||
|
@override
|
||||||
|
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]:
|
||||||
|
return [
|
||||||
|
ViduTextToVideoNode,
|
||||||
|
ViduImageToVideoNode,
|
||||||
|
ViduReferenceVideoNode,
|
||||||
|
ViduStartEndToVideoNode,
|
||||||
|
]
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> ViduExtension:
|
||||||
|
return ViduExtension()
|
||||||
@ -53,6 +53,53 @@ def validate_image_aspect_ratio(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_image_aspect_ratio_range(
|
||||||
|
image: torch.Tensor,
|
||||||
|
min_ratio: tuple[float, float], # e.g. (1, 4)
|
||||||
|
max_ratio: tuple[float, float], # e.g. (4, 1)
|
||||||
|
*,
|
||||||
|
strict: bool = True, # True -> (min, max); False -> [min, max]
|
||||||
|
) -> float:
|
||||||
|
a1, b1 = min_ratio
|
||||||
|
a2, b2 = max_ratio
|
||||||
|
if a1 <= 0 or b1 <= 0 or a2 <= 0 or b2 <= 0:
|
||||||
|
raise ValueError("Ratios must be positive, like (1, 4) or (4, 1).")
|
||||||
|
lo, hi = (a1 / b1), (a2 / b2)
|
||||||
|
if lo > hi:
|
||||||
|
lo, hi = hi, lo
|
||||||
|
a1, b1, a2, b2 = a2, b2, a1, b1 # swap only for error text
|
||||||
|
w, h = get_image_dimensions(image)
|
||||||
|
if w <= 0 or h <= 0:
|
||||||
|
raise ValueError(f"Invalid image dimensions: {w}x{h}")
|
||||||
|
ar = w / h
|
||||||
|
ok = (lo < ar < hi) if strict else (lo <= ar <= hi)
|
||||||
|
if not ok:
|
||||||
|
op = "<" if strict else "≤"
|
||||||
|
raise ValueError(f"Image aspect ratio {ar:.6g} is outside allowed range: {a1}:{b1} {op} ratio {op} {a2}:{b2}")
|
||||||
|
return ar
|
||||||
|
|
||||||
|
|
||||||
|
def validate_aspect_ratio_closeness(
|
||||||
|
start_img,
|
||||||
|
end_img,
|
||||||
|
min_rel: float,
|
||||||
|
max_rel: float,
|
||||||
|
*,
|
||||||
|
strict: bool = False, # True => exclusive, False => inclusive
|
||||||
|
) -> None:
|
||||||
|
w1, h1 = get_image_dimensions(start_img)
|
||||||
|
w2, h2 = get_image_dimensions(end_img)
|
||||||
|
if min(w1, h1, w2, h2) <= 0:
|
||||||
|
raise ValueError("Invalid image dimensions")
|
||||||
|
ar1 = w1 / h1
|
||||||
|
ar2 = w2 / h2
|
||||||
|
# Normalize so it is symmetric (no need to check both ar1/ar2 and ar2/ar1)
|
||||||
|
closeness = max(ar1, ar2) / min(ar1, ar2)
|
||||||
|
limit = max(max_rel, 1.0 / min_rel) # for 0.8..1.25 this is 1.25
|
||||||
|
if (closeness >= limit) if strict else (closeness > limit):
|
||||||
|
raise ValueError(f"Aspect ratios must be close: start/end={ar1/ar2:.4f}, allowed range {min_rel}–{max_rel}.")
|
||||||
|
|
||||||
|
|
||||||
def validate_video_dimensions(
|
def validate_video_dimensions(
|
||||||
video: VideoInput,
|
video: VideoInput,
|
||||||
min_width: Optional[int] = None,
|
min_width: Optional[int] = None,
|
||||||
@ -98,3 +145,9 @@ def validate_video_duration(
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Video duration must be at most {max_duration}s, got {duration}s"
|
f"Video duration must be at most {max_duration}s, got {duration}s"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_number_of_images(images):
|
||||||
|
if isinstance(images, torch.Tensor):
|
||||||
|
return images.shape[0] if images.ndim >= 4 else 1
|
||||||
|
return len(images)
|
||||||
|
|||||||
@ -1,25 +1,6 @@
|
|||||||
import node_helpers
|
import node_helpers
|
||||||
import comfy.utils
|
import comfy.utils
|
||||||
|
import math
|
||||||
PREFERRED_QWENIMAGE_RESOLUTIONS = [
|
|
||||||
(672, 1568),
|
|
||||||
(688, 1504),
|
|
||||||
(720, 1456),
|
|
||||||
(752, 1392),
|
|
||||||
(800, 1328),
|
|
||||||
(832, 1248),
|
|
||||||
(880, 1184),
|
|
||||||
(944, 1104),
|
|
||||||
(1024, 1024),
|
|
||||||
(1104, 944),
|
|
||||||
(1184, 880),
|
|
||||||
(1248, 832),
|
|
||||||
(1328, 800),
|
|
||||||
(1392, 752),
|
|
||||||
(1456, 720),
|
|
||||||
(1504, 688),
|
|
||||||
(1568, 672),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class TextEncodeQwenImageEdit:
|
class TextEncodeQwenImageEdit:
|
||||||
@ -42,13 +23,17 @@ class TextEncodeQwenImageEdit:
|
|||||||
if image is None:
|
if image is None:
|
||||||
images = []
|
images = []
|
||||||
else:
|
else:
|
||||||
images = [image]
|
samples = image.movedim(-1, 1)
|
||||||
|
total = int(1024 * 1024)
|
||||||
|
|
||||||
|
scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
|
||||||
|
width = round(samples.shape[3] * scale_by)
|
||||||
|
height = round(samples.shape[2] * scale_by)
|
||||||
|
|
||||||
|
s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
|
||||||
|
image = s.movedim(1, -1)
|
||||||
|
images = [image[:, :, :, :3]]
|
||||||
if vae is not None:
|
if vae is not None:
|
||||||
width = image.shape[2]
|
|
||||||
height = image.shape[1]
|
|
||||||
aspect_ratio = width / height
|
|
||||||
_, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS)
|
|
||||||
image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
|
|
||||||
ref_latent = vae.encode(image[:, :, :, :3])
|
ref_latent = vae.encode(image[:, :, :, :3])
|
||||||
|
|
||||||
tokens = clip.tokenize(prompt, images=images)
|
tokens = clip.tokenize(prompt, images=images)
|
||||||
|
|||||||
1
nodes.py
1
nodes.py
@ -2360,6 +2360,7 @@ async def init_builtin_api_nodes():
|
|||||||
"nodes_moonvalley.py",
|
"nodes_moonvalley.py",
|
||||||
"nodes_rodin.py",
|
"nodes_rodin.py",
|
||||||
"nodes_gemini.py",
|
"nodes_gemini.py",
|
||||||
|
"nodes_vidu.py",
|
||||||
]
|
]
|
||||||
|
|
||||||
if not await load_custom_node(os.path.join(api_nodes_dir, "canary.py"), module_parent="comfy_api_nodes"):
|
if not await load_custom_node(os.path.join(api_nodes_dir, "canary.py"), module_parent="comfy_api_nodes"):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user