mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-23 04:40:15 +08:00
Merge branch 'comfyanonymous:master' into master
This commit is contained in:
commit
de8d67992f
@ -39,6 +39,7 @@ from comfy_api_nodes.apinode_utils import (
|
|||||||
tensor_to_base64_string,
|
tensor_to_base64_string,
|
||||||
bytesio_to_image_tensor,
|
bytesio_to_image_tensor,
|
||||||
)
|
)
|
||||||
|
from comfy_api.util import VideoContainer, VideoCodec
|
||||||
|
|
||||||
|
|
||||||
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
|
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
|
||||||
@ -310,7 +311,7 @@ class GeminiNode(ComfyNodeABC):
|
|||||||
Returns:
|
Returns:
|
||||||
List of GeminiPart objects containing the encoded video.
|
List of GeminiPart objects containing the encoded video.
|
||||||
"""
|
"""
|
||||||
from comfy_api.util import VideoContainer, VideoCodec
|
|
||||||
base_64_string = video_to_base64_string(
|
base_64_string = video_to_base64_string(
|
||||||
video_input,
|
video_input,
|
||||||
container_format=VideoContainer.MP4,
|
container_format=VideoContainer.MP4,
|
||||||
|
|||||||
@ -712,6 +712,9 @@ class KlingImage2VideoNode(KlingNodeBase):
|
|||||||
# Camera control type for image 2 video is always `simple`
|
# Camera control type for image 2 video is always `simple`
|
||||||
camera_control.type = KlingCameraControlType.simple
|
camera_control.type = KlingCameraControlType.simple
|
||||||
|
|
||||||
|
if mode == "std" and model_name == KlingVideoGenModelName.kling_v2_5_turbo.value:
|
||||||
|
mode = "pro" # October 5: currently "std" mode is not supported for this model
|
||||||
|
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path=PATH_IMAGE_TO_VIDEO,
|
path=PATH_IMAGE_TO_VIDEO,
|
||||||
|
|||||||
@ -2,11 +2,7 @@ import logging
|
|||||||
from typing import Any, Callable, Optional, TypeVar
|
from typing import Any, Callable, Optional, TypeVar
|
||||||
import torch
|
import torch
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
from comfy_api_nodes.util.validation_utils import (
|
from comfy_api_nodes.util.validation_utils import validate_image_dimensions
|
||||||
get_image_dimensions,
|
|
||||||
validate_image_dimensions,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
from comfy_api_nodes.apis import (
|
from comfy_api_nodes.apis import (
|
||||||
MoonvalleyTextToVideoRequest,
|
MoonvalleyTextToVideoRequest,
|
||||||
@ -132,47 +128,6 @@ def validate_prompts(
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
|
|
||||||
# inference validation
|
|
||||||
# T = num_frames
|
|
||||||
# in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
|
|
||||||
# with image conditioning: H*W must be divisible by 8192
|
|
||||||
# without image conditioning: T divisible by 32
|
|
||||||
if num_frames_in and not num_frames_in % 16 == 0:
|
|
||||||
return False, ("The input video total frame count must be divisible by 16!")
|
|
||||||
|
|
||||||
if height % 8 != 0 or width % 8 != 0:
|
|
||||||
return False, (
|
|
||||||
f"Height ({height}) and width ({width}) must be " "divisible by 8"
|
|
||||||
)
|
|
||||||
|
|
||||||
if with_frame_conditioning:
|
|
||||||
if (height * width) % 8192 != 0:
|
|
||||||
return False, (
|
|
||||||
f"Height * width ({height * width}) must be "
|
|
||||||
"divisible by 8192 for frame conditioning"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if num_frames_in and not num_frames_in % 32 == 0:
|
|
||||||
return False, ("The input video total frame count must be divisible by 32!")
|
|
||||||
|
|
||||||
|
|
||||||
def validate_input_image(
|
|
||||||
image: torch.Tensor, with_frame_conditioning: bool = False
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Validates the input image adheres to the expectations of the API:
|
|
||||||
- The image resolution should not be less than 300*300px
|
|
||||||
- The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
|
|
||||||
|
|
||||||
"""
|
|
||||||
height, width = get_image_dimensions(image)
|
|
||||||
validate_input_media(width, height, with_frame_conditioning)
|
|
||||||
validate_image_dimensions(
|
|
||||||
image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
|
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
|
||||||
"""
|
"""
|
||||||
Validates and processes video input for Moonvalley Video-to-Video generation.
|
Validates and processes video input for Moonvalley Video-to-Video generation.
|
||||||
@ -499,7 +454,7 @@ class MoonvalleyImg2VideoNode(comfy_io.ComfyNode):
|
|||||||
seed: int,
|
seed: int,
|
||||||
steps: int,
|
steps: int,
|
||||||
) -> comfy_io.NodeOutput:
|
) -> comfy_io.NodeOutput:
|
||||||
validate_input_image(image, True)
|
validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
|
||||||
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
|
||||||
width_height = parse_width_height_from_res(resolution)
|
width_height = parse_width_height_from_res(resolution)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -360,7 +360,7 @@ class RecordAudio:
|
|||||||
def load(self, audio):
|
def load(self, audio):
|
||||||
audio_path = folder_paths.get_annotated_filepath(audio)
|
audio_path = folder_paths.get_annotated_filepath(audio)
|
||||||
|
|
||||||
waveform, sample_rate = torchaudio.load(audio_path)
|
waveform, sample_rate = load(audio_path)
|
||||||
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
|
||||||
return (audio, )
|
return (audio, )
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
import torch
|
import torch
|
||||||
import nodes
|
import nodes
|
||||||
import comfy.utils
|
import comfy.utils
|
||||||
|
from typing_extensions import override
|
||||||
|
from comfy_api.latest import ComfyExtension, io
|
||||||
|
|
||||||
def camera_embeddings(elevation, azimuth):
|
def camera_embeddings(elevation, azimuth):
|
||||||
elevation = torch.as_tensor([elevation])
|
elevation = torch.as_tensor([elevation])
|
||||||
@ -20,26 +22,31 @@ def camera_embeddings(elevation, azimuth):
|
|||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
class StableZero123_Conditioning:
|
class StableZero123_Conditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="StableZero123_Conditioning",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
}}
|
io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
|
io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -51,30 +58,35 @@ class StableZero123_Conditioning:
|
|||||||
positive = [[cond, {"concat_latent_image": t}]]
|
positive = [[cond, {"concat_latent_image": t}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
||||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent})
|
return io.NodeOutput(positive, negative, {"samples":latent})
|
||||||
|
|
||||||
class StableZero123_Conditioning_Batched:
|
class StableZero123_Conditioning_Batched(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="StableZero123_Conditioning_Batched",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 256, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("width", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
"elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("height", default=256, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
"azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0, "step": 0.1, "round": False}),
|
io.Int.Input("batch_size", default=1, min=1, max=4096),
|
||||||
}}
|
io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False),
|
||||||
|
io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -93,27 +105,32 @@ class StableZero123_Conditioning_Batched:
|
|||||||
positive = [[cond, {"concat_latent_image": t}]]
|
positive = [[cond, {"concat_latent_image": t}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
|
||||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
|
return io.NodeOutput(positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
|
||||||
|
|
||||||
class SV3D_Conditioning:
|
class SV3D_Conditioning(io.ComfyNode):
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def define_schema(cls):
|
||||||
return {"required": { "clip_vision": ("CLIP_VISION",),
|
return io.Schema(
|
||||||
"init_image": ("IMAGE",),
|
node_id="SV3D_Conditioning",
|
||||||
"vae": ("VAE",),
|
category="conditioning/3d_models",
|
||||||
"width": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
inputs=[
|
||||||
"height": ("INT", {"default": 576, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}),
|
io.ClipVision.Input("clip_vision"),
|
||||||
"video_frames": ("INT", {"default": 21, "min": 1, "max": 4096}),
|
io.Image.Input("init_image"),
|
||||||
"elevation": ("FLOAT", {"default": 0.0, "min": -90.0, "max": 90.0, "step": 0.1, "round": False}),
|
io.Vae.Input("vae"),
|
||||||
}}
|
io.Int.Input("width", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
|
io.Int.Input("height", default=576, min=16, max=nodes.MAX_RESOLUTION, step=8),
|
||||||
RETURN_NAMES = ("positive", "negative", "latent")
|
io.Int.Input("video_frames", default=21, min=1, max=4096),
|
||||||
|
io.Float.Input("elevation", default=0.0, min=-90.0, max=90.0, step=0.1, round=False)
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
io.Conditioning.Output(display_name="positive"),
|
||||||
|
io.Conditioning.Output(display_name="negative"),
|
||||||
|
io.Latent.Output(display_name="latent")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
FUNCTION = "encode"
|
@classmethod
|
||||||
|
def execute(cls, clip_vision, init_image, vae, width, height, video_frames, elevation) -> io.NodeOutput:
|
||||||
CATEGORY = "conditioning/3d_models"
|
|
||||||
|
|
||||||
def encode(self, clip_vision, init_image, vae, width, height, video_frames, elevation):
|
|
||||||
output = clip_vision.encode_image(init_image)
|
output = clip_vision.encode_image(init_image)
|
||||||
pooled = output.image_embeds.unsqueeze(0)
|
pooled = output.image_embeds.unsqueeze(0)
|
||||||
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
pixels = comfy.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
|
||||||
@ -133,11 +150,17 @@ class SV3D_Conditioning:
|
|||||||
positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
|
positive = [[pooled, {"concat_latent_image": t, "elevation": elevations, "azimuth": azimuths}]]
|
||||||
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
|
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t), "elevation": elevations, "azimuth": azimuths}]]
|
||||||
latent = torch.zeros([video_frames, 4, height // 8, width // 8])
|
latent = torch.zeros([video_frames, 4, height // 8, width // 8])
|
||||||
return (positive, negative, {"samples":latent})
|
return io.NodeOutput(positive, negative, {"samples":latent})
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
class Stable3DExtension(ComfyExtension):
|
||||||
"StableZero123_Conditioning": StableZero123_Conditioning,
|
@override
|
||||||
"StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched,
|
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||||
"SV3D_Conditioning": SV3D_Conditioning,
|
return [
|
||||||
}
|
StableZero123_Conditioning,
|
||||||
|
StableZero123_Conditioning_Batched,
|
||||||
|
SV3D_Conditioning,
|
||||||
|
]
|
||||||
|
|
||||||
|
async def comfy_entrypoint() -> Stable3DExtension:
|
||||||
|
return Stable3DExtension()
|
||||||
|
|||||||
@ -70,7 +70,5 @@ messages_control.disable = [
|
|||||||
"invalid-overridden-method",
|
"invalid-overridden-method",
|
||||||
"unused-variable",
|
"unused-variable",
|
||||||
"pointless-string-statement",
|
"pointless-string-statement",
|
||||||
"inconsistent-return-statements",
|
|
||||||
"import-outside-toplevel",
|
|
||||||
"redefined-outer-name",
|
"redefined-outer-name",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -25,6 +25,5 @@ av>=14.2.0
|
|||||||
#non essential dependencies:
|
#non essential dependencies:
|
||||||
kornia>=0.7.1
|
kornia>=0.7.1
|
||||||
spandrel
|
spandrel
|
||||||
soundfile
|
|
||||||
pydantic~=2.0
|
pydantic~=2.0
|
||||||
pydantic-settings~=2.0
|
pydantic-settings~=2.0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user