mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-15 01:07:03 +08:00
feat(api-nodes): new TextToVideoWithAudio and ImageToVideoWithAudio nodes (#11267)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
This commit is contained in:
parent
5495589db3
commit
908fd7d749
@ -51,25 +51,25 @@ class TaskStatusImageResult(BaseModel):
|
||||
url: str = Field(..., description="URL for generated image")
|
||||
|
||||
|
||||
class OmniTaskStatusResults(BaseModel):
|
||||
class TaskStatusResults(BaseModel):
|
||||
videos: list[TaskStatusVideoResult] | None = Field(None)
|
||||
images: list[TaskStatusImageResult] | None = Field(None)
|
||||
|
||||
|
||||
class OmniTaskStatusResponseData(BaseModel):
|
||||
class TaskStatusResponseData(BaseModel):
|
||||
created_at: int | None = Field(None, description="Task creation time")
|
||||
updated_at: int | None = Field(None, description="Task update time")
|
||||
task_status: str | None = None
|
||||
task_status_msg: str | None = Field(None, description="Additional failure reason. Only for polling endpoint.")
|
||||
task_id: str | None = Field(None, description="Task ID")
|
||||
task_result: OmniTaskStatusResults | None = Field(None)
|
||||
task_result: TaskStatusResults | None = Field(None)
|
||||
|
||||
|
||||
class OmniTaskStatusResponse(BaseModel):
|
||||
class TaskStatusResponse(BaseModel):
|
||||
code: int | None = Field(None, description="Error code")
|
||||
message: str | None = Field(None, description="Error message")
|
||||
request_id: str | None = Field(None, description="Request ID")
|
||||
data: OmniTaskStatusResponseData | None = Field(None)
|
||||
data: TaskStatusResponseData | None = Field(None)
|
||||
|
||||
|
||||
class OmniImageParamImage(BaseModel):
|
||||
@ -84,3 +84,21 @@ class OmniProImageRequest(BaseModel):
|
||||
mode: str = Field("pro")
|
||||
n: int | None = Field(1, le=9)
|
||||
image_list: list[OmniImageParamImage] | None = Field(..., max_length=10)
|
||||
|
||||
|
||||
class TextToVideoWithAudioRequest(BaseModel):
|
||||
model_name: str = Field(..., description="kling-v2-6")
|
||||
aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'")
|
||||
duration: str = Field(..., description="'5' or '10'")
|
||||
prompt: str = Field(...)
|
||||
mode: str = Field("pro")
|
||||
sound: str = Field(..., description="'on' or 'off'")
|
||||
|
||||
|
||||
class ImageToVideoWithAudioRequest(BaseModel):
|
||||
model_name: str = Field(..., description="kling-v2-6")
|
||||
image: str = Field(...)
|
||||
duration: str = Field(..., description="'5' or '10'")
|
||||
prompt: str = Field(...)
|
||||
mode: str = Field("pro")
|
||||
sound: str = Field(..., description="'on' or 'off'")
|
||||
|
||||
@ -50,6 +50,7 @@ from comfy_api_nodes.apis import (
|
||||
KlingSingleImageEffectModelName,
|
||||
)
|
||||
from comfy_api_nodes.apis.kling_api import (
|
||||
ImageToVideoWithAudioRequest,
|
||||
OmniImageParamImage,
|
||||
OmniParamImage,
|
||||
OmniParamVideo,
|
||||
@ -57,7 +58,8 @@ from comfy_api_nodes.apis.kling_api import (
|
||||
OmniProImageRequest,
|
||||
OmniProReferences2VideoRequest,
|
||||
OmniProText2VideoRequest,
|
||||
OmniTaskStatusResponse,
|
||||
TaskStatusResponse,
|
||||
TextToVideoWithAudioRequest,
|
||||
)
|
||||
from comfy_api_nodes.util import (
|
||||
ApiEndpoint,
|
||||
@ -242,7 +244,7 @@ def normalize_omni_prompt_references(prompt: str) -> str:
|
||||
return re.sub(r"(?<!\w)@video(?P<idx>\d*)(?!\w)", _video_repl, prompt)
|
||||
|
||||
|
||||
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: OmniTaskStatusResponse) -> IO.NodeOutput:
|
||||
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusResponse) -> IO.NodeOutput:
|
||||
if response.code:
|
||||
raise RuntimeError(
|
||||
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
|
||||
@ -250,7 +252,7 @@ async def finish_omni_video_task(cls: type[IO.ComfyNode], response: OmniTaskStat
|
||||
final_response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
status_extractor=lambda r: (r.data.task_status if r.data else None),
|
||||
max_poll_attempts=160,
|
||||
)
|
||||
@ -834,7 +836,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProText2VideoRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -929,7 +931,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProFirstLastFrameRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -997,7 +999,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProReferences2VideoRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -1081,7 +1083,7 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProReferences2VideoRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -1162,7 +1164,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProReferences2VideoRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -1237,7 +1239,7 @@ class OmniProImageNode(IO.ComfyNode):
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
data=OmniProImageRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
@ -1253,7 +1255,7 @@ class OmniProImageNode(IO.ComfyNode):
|
||||
final_response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/kling/v1/images/omni-image/{response.data.task_id}"),
|
||||
response_model=OmniTaskStatusResponse,
|
||||
response_model=TaskStatusResponse,
|
||||
status_extractor=lambda r: (r.data.task_status if r.data else None),
|
||||
)
|
||||
return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url))
|
||||
@ -1328,9 +1330,8 @@ class KlingImage2VideoNode(IO.ComfyNode):
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="KlingImage2VideoNode",
|
||||
display_name="Kling Image to Video",
|
||||
display_name="Kling Image(First Frame) to Video",
|
||||
category="api node/video/Kling",
|
||||
description="Kling Image to Video Node",
|
||||
inputs=[
|
||||
IO.Image.Input("start_frame", tooltip="The reference image used to generate the video."),
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
|
||||
@ -2034,6 +2035,136 @@ class KlingImageGenerationNode(IO.ComfyNode):
|
||||
return IO.NodeOutput(await image_result_to_node_output(images))
|
||||
|
||||
|
||||
class TextToVideoWithAudio(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="KlingTextToVideoWithAudio",
|
||||
display_name="Kling Text to Video with Audio",
|
||||
category="api node/video/Kling",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v2-6"]),
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
|
||||
IO.Combo.Input("mode", options=["pro"]),
|
||||
IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
|
||||
IO.Combo.Input("duration", options=[5, 10]),
|
||||
IO.Boolean.Input("generate_audio", default=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
model_name: str,
|
||||
prompt: str,
|
||||
mode: str,
|
||||
aspect_ratio: str,
|
||||
duration: int,
|
||||
generate_audio: bool,
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/text2video", method="POST"),
|
||||
response_model=TaskStatusResponse,
|
||||
data=TextToVideoWithAudioRequest(
|
||||
model_name=model_name,
|
||||
prompt=prompt,
|
||||
mode=mode,
|
||||
aspect_ratio=aspect_ratio,
|
||||
duration=str(duration),
|
||||
sound="on" if generate_audio else "off",
|
||||
),
|
||||
)
|
||||
if response.code:
|
||||
raise RuntimeError(
|
||||
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
|
||||
)
|
||||
final_response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/kling/v1/videos/text2video/{response.data.task_id}"),
|
||||
response_model=TaskStatusResponse,
|
||||
status_extractor=lambda r: (r.data.task_status if r.data else None),
|
||||
)
|
||||
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
|
||||
|
||||
|
||||
class ImageToVideoWithAudio(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="KlingImageToVideoWithAudio",
|
||||
display_name="Kling Image(First Frame) to Video with Audio",
|
||||
category="api node/video/Kling",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v2-6"]),
|
||||
IO.Image.Input("start_frame"),
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
|
||||
IO.Combo.Input("mode", options=["pro"]),
|
||||
IO.Combo.Input("duration", options=[5, 10]),
|
||||
IO.Boolean.Input("generate_audio", default=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.Video.Output(),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
model_name: str,
|
||||
start_frame: Input.Image,
|
||||
prompt: str,
|
||||
mode: str,
|
||||
duration: int,
|
||||
generate_audio: bool,
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, min_length=1, max_length=2500)
|
||||
validate_image_dimensions(start_frame, min_width=300, min_height=300)
|
||||
validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
|
||||
response_model=TaskStatusResponse,
|
||||
data=ImageToVideoWithAudioRequest(
|
||||
model_name=model_name,
|
||||
image=(await upload_images_to_comfyapi(cls, start_frame))[0],
|
||||
prompt=prompt,
|
||||
mode=mode,
|
||||
duration=str(duration),
|
||||
sound="on" if generate_audio else "off",
|
||||
),
|
||||
)
|
||||
if response.code:
|
||||
raise RuntimeError(
|
||||
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
|
||||
)
|
||||
final_response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
|
||||
response_model=TaskStatusResponse,
|
||||
status_extractor=lambda r: (r.data.task_status if r.data else None),
|
||||
)
|
||||
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
|
||||
|
||||
|
||||
class KlingExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -2057,6 +2188,8 @@ class KlingExtension(ComfyExtension):
|
||||
OmniProVideoToVideoNode,
|
||||
OmniProEditVideoNode,
|
||||
OmniProImageNode,
|
||||
TextToVideoWithAudio,
|
||||
ImageToVideoWithAudio,
|
||||
]
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user