mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 19:42:59 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
2fe58571e2
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,2 +1,3 @@
|
|||||||
/web/assets/** linguist-generated
|
/web/assets/** linguist-generated
|
||||||
/web/** linguist-vendored
|
/web/** linguist-vendored
|
||||||
|
comfy_api_nodes/apis/__init__.py linguist-generated
|
||||||
|
|||||||
@ -66,6 +66,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
|
|||||||
- [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
|
- [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
|
||||||
- [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
|
- [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
|
||||||
- [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
|
- [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
|
||||||
|
- [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
|
||||||
- Image Editing Models
|
- Image Editing Models
|
||||||
- [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
|
- [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
|
||||||
- [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
|
- [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from einops import repeat
|
|||||||
from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
|
from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
|
||||||
from comfy.ldm.modules.attention import optimized_attention_masked
|
from comfy.ldm.modules.attention import optimized_attention_masked
|
||||||
from comfy.ldm.flux.layers import EmbedND
|
from comfy.ldm.flux.layers import EmbedND
|
||||||
|
import comfy.ldm.common_dit
|
||||||
|
|
||||||
class GELU(nn.Module):
|
class GELU(nn.Module):
|
||||||
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
|
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
|
||||||
@ -364,8 +364,9 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
|
|
||||||
image_rotary_emb = self.pos_embeds(x, context)
|
image_rotary_emb = self.pos_embeds(x, context)
|
||||||
|
|
||||||
orig_shape = x.shape
|
hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (1, self.patch_size, self.patch_size))
|
||||||
hidden_states = x.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2)
|
orig_shape = hidden_states.shape
|
||||||
|
hidden_states = hidden_states.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2)
|
||||||
hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5)
|
hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5)
|
||||||
hidden_states = hidden_states.reshape(orig_shape[0], (orig_shape[-2] // 2) * (orig_shape[-1] // 2), orig_shape[1] * 4)
|
hidden_states = hidden_states.reshape(orig_shape[0], (orig_shape[-2] // 2) * (orig_shape[-1] // 2), orig_shape[1] * 4)
|
||||||
|
|
||||||
@ -396,4 +397,4 @@ class QwenImageTransformer2DModel(nn.Module):
|
|||||||
|
|
||||||
hidden_states = hidden_states.view(orig_shape[0], orig_shape[-2] // 2, orig_shape[-1] // 2, orig_shape[1], 2, 2)
|
hidden_states = hidden_states.view(orig_shape[0], orig_shape[-2] // 2, orig_shape[-1] // 2, orig_shape[1], 2, 2)
|
||||||
hidden_states = hidden_states.permute(0, 3, 1, 4, 2, 5)
|
hidden_states = hidden_states.permute(0, 3, 1, 4, 2, 5)
|
||||||
return hidden_states.reshape(orig_shape)
|
return hidden_states.reshape(orig_shape)[:, :, :, :x.shape[-2], :x.shape[-1]]
|
||||||
|
|||||||
@ -293,6 +293,15 @@ def model_lora_keys_unet(model, key_map={}):
|
|||||||
key_lora = k[len("diffusion_model."):-len(".weight")]
|
key_lora = k[len("diffusion_model."):-len(".weight")]
|
||||||
key_map["{}".format(key_lora)] = k
|
key_map["{}".format(key_lora)] = k
|
||||||
|
|
||||||
|
if isinstance(model, comfy.model_base.QwenImage):
|
||||||
|
for k in sdk:
|
||||||
|
if k.startswith("diffusion_model.") and k.endswith(".weight"): #QwenImage lora format
|
||||||
|
key_lora = k[len("diffusion_model."):-len(".weight")]
|
||||||
|
# Direct mapping for transformer_blocks format (QwenImage LoRA format)
|
||||||
|
key_map["{}".format(key_lora)] = k
|
||||||
|
# Support transformer prefix format
|
||||||
|
key_map["transformer.{}".format(key_lora)] = k
|
||||||
|
|
||||||
return key_map
|
return key_map
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1237,7 +1237,7 @@ class QwenImage(supported_models_base.BASE):
|
|||||||
|
|
||||||
sampling_settings = {
|
sampling_settings = {
|
||||||
"multiplier": 1.0,
|
"multiplier": 1.0,
|
||||||
"shift": 2.6,
|
"shift": 1.15,
|
||||||
}
|
}
|
||||||
|
|
||||||
memory_usage_factor = 1.8 #TODO
|
memory_usage_factor = 1.8 #TODO
|
||||||
|
|||||||
@ -96,6 +96,7 @@ class LoRAAdapter(WeightAdapterBase):
|
|||||||
diffusers3_lora = "{}.lora.up.weight".format(x)
|
diffusers3_lora = "{}.lora.up.weight".format(x)
|
||||||
mochi_lora = "{}.lora_B".format(x)
|
mochi_lora = "{}.lora_B".format(x)
|
||||||
transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
|
transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
|
||||||
|
qwen_default_lora = "{}.lora_B.default.weight".format(x)
|
||||||
A_name = None
|
A_name = None
|
||||||
|
|
||||||
if regular_lora in lora.keys():
|
if regular_lora in lora.keys():
|
||||||
@ -122,6 +123,10 @@ class LoRAAdapter(WeightAdapterBase):
|
|||||||
A_name = transformers_lora
|
A_name = transformers_lora
|
||||||
B_name = "{}.lora_linear_layer.down.weight".format(x)
|
B_name = "{}.lora_linear_layer.down.weight".format(x)
|
||||||
mid_name = None
|
mid_name = None
|
||||||
|
elif qwen_default_lora in lora.keys():
|
||||||
|
A_name = qwen_default_lora
|
||||||
|
B_name = "{}.lora_A.default.weight".format(x)
|
||||||
|
mid_name = None
|
||||||
|
|
||||||
if A_name is not None:
|
if A_name is not None:
|
||||||
mid = None
|
mid = None
|
||||||
|
|||||||
2656
comfy_api_nodes/apis/__init__.py
generated
2656
comfy_api_nodes/apis/__init__.py
generated
File diff suppressed because it is too large
Load Diff
@ -127,7 +127,7 @@ class TripoTextToModelRequest(BaseModel):
|
|||||||
type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task')
|
type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task')
|
||||||
prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024)
|
prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024)
|
||||||
negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024)
|
negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024)
|
||||||
model_version: Optional[TripoModelVersion] = TripoModelVersion.V2_5
|
model_version: Optional[TripoModelVersion] = TripoModelVersion.v2_5_20250123
|
||||||
face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to')
|
face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to')
|
||||||
texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model')
|
texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model')
|
||||||
pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model')
|
pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model')
|
||||||
|
|||||||
@ -8,10 +8,10 @@ from typing import Optional
|
|||||||
from comfy.comfy_types.node_typing import IO, ComfyNodeABC
|
from comfy.comfy_types.node_typing import IO, ComfyNodeABC
|
||||||
from comfy_api.input_impl.video_types import VideoFromFile
|
from comfy_api.input_impl.video_types import VideoFromFile
|
||||||
from comfy_api_nodes.apis import (
|
from comfy_api_nodes.apis import (
|
||||||
Veo2GenVidRequest,
|
VeoGenVidRequest,
|
||||||
Veo2GenVidResponse,
|
VeoGenVidResponse,
|
||||||
Veo2GenVidPollRequest,
|
VeoGenVidPollRequest,
|
||||||
Veo2GenVidPollResponse
|
VeoGenVidPollResponse
|
||||||
)
|
)
|
||||||
from comfy_api_nodes.apis.client import (
|
from comfy_api_nodes.apis.client import (
|
||||||
ApiEndpoint,
|
ApiEndpoint,
|
||||||
@ -35,7 +35,7 @@ def convert_image_to_base64(image: torch.Tensor):
|
|||||||
return tensor_to_base64_string(scaled_image)
|
return tensor_to_base64_string(scaled_image)
|
||||||
|
|
||||||
|
|
||||||
def get_video_url_from_response(poll_response: Veo2GenVidPollResponse) -> Optional[str]:
|
def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optional[str]:
|
||||||
if (
|
if (
|
||||||
poll_response.response
|
poll_response.response
|
||||||
and hasattr(poll_response.response, "videos")
|
and hasattr(poll_response.response, "videos")
|
||||||
@ -130,6 +130,14 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
"default": None,
|
"default": None,
|
||||||
"tooltip": "Optional reference image to guide video generation",
|
"tooltip": "Optional reference image to guide video generation",
|
||||||
}),
|
}),
|
||||||
|
"model": (
|
||||||
|
IO.COMBO,
|
||||||
|
{
|
||||||
|
"options": ["veo-2.0-generate-001"],
|
||||||
|
"default": "veo-2.0-generate-001",
|
||||||
|
"tooltip": "Veo 2 model to use for video generation",
|
||||||
|
},
|
||||||
|
),
|
||||||
},
|
},
|
||||||
"hidden": {
|
"hidden": {
|
||||||
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
"auth_token": "AUTH_TOKEN_COMFY_ORG",
|
||||||
@ -141,7 +149,7 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
RETURN_TYPES = (IO.VIDEO,)
|
RETURN_TYPES = (IO.VIDEO,)
|
||||||
FUNCTION = "generate_video"
|
FUNCTION = "generate_video"
|
||||||
CATEGORY = "api node/video/Veo"
|
CATEGORY = "api node/video/Veo"
|
||||||
DESCRIPTION = "Generates videos from text prompts using Google's Veo API"
|
DESCRIPTION = "Generates videos from text prompts using Google's Veo 2 API"
|
||||||
API_NODE = True
|
API_NODE = True
|
||||||
|
|
||||||
def generate_video(
|
def generate_video(
|
||||||
@ -154,6 +162,8 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
person_generation="ALLOW",
|
person_generation="ALLOW",
|
||||||
seed=0,
|
seed=0,
|
||||||
image=None,
|
image=None,
|
||||||
|
model="veo-2.0-generate-001",
|
||||||
|
generate_audio=False,
|
||||||
unique_id: Optional[str] = None,
|
unique_id: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@ -188,16 +198,19 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
parameters["negativePrompt"] = negative_prompt
|
parameters["negativePrompt"] = negative_prompt
|
||||||
if seed > 0:
|
if seed > 0:
|
||||||
parameters["seed"] = seed
|
parameters["seed"] = seed
|
||||||
|
# Only add generateAudio for Veo 3 models
|
||||||
|
if "veo-3.0" in model:
|
||||||
|
parameters["generateAudio"] = generate_audio
|
||||||
|
|
||||||
# Initial request to start video generation
|
# Initial request to start video generation
|
||||||
initial_operation = SynchronousOperation(
|
initial_operation = SynchronousOperation(
|
||||||
endpoint=ApiEndpoint(
|
endpoint=ApiEndpoint(
|
||||||
path="/proxy/veo/generate",
|
path=f"/proxy/veo/{model}/generate",
|
||||||
method=HttpMethod.POST,
|
method=HttpMethod.POST,
|
||||||
request_model=Veo2GenVidRequest,
|
request_model=VeoGenVidRequest,
|
||||||
response_model=Veo2GenVidResponse
|
response_model=VeoGenVidResponse
|
||||||
),
|
),
|
||||||
request=Veo2GenVidRequest(
|
request=VeoGenVidRequest(
|
||||||
instances=instances,
|
instances=instances,
|
||||||
parameters=parameters
|
parameters=parameters
|
||||||
),
|
),
|
||||||
@ -223,16 +236,16 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
# Define the polling operation
|
# Define the polling operation
|
||||||
poll_operation = PollingOperation(
|
poll_operation = PollingOperation(
|
||||||
poll_endpoint=ApiEndpoint(
|
poll_endpoint=ApiEndpoint(
|
||||||
path="/proxy/veo/poll",
|
path=f"/proxy/veo/{model}/poll",
|
||||||
method=HttpMethod.POST,
|
method=HttpMethod.POST,
|
||||||
request_model=Veo2GenVidPollRequest,
|
request_model=VeoGenVidPollRequest,
|
||||||
response_model=Veo2GenVidPollResponse
|
response_model=VeoGenVidPollResponse
|
||||||
),
|
),
|
||||||
completed_statuses=["completed"],
|
completed_statuses=["completed"],
|
||||||
failed_statuses=[], # No failed statuses, we'll handle errors after polling
|
failed_statuses=[], # No failed statuses, we'll handle errors after polling
|
||||||
status_extractor=status_extractor,
|
status_extractor=status_extractor,
|
||||||
progress_extractor=progress_extractor,
|
progress_extractor=progress_extractor,
|
||||||
request=Veo2GenVidPollRequest(
|
request=VeoGenVidPollRequest(
|
||||||
operationName=operation_name
|
operationName=operation_name
|
||||||
),
|
),
|
||||||
auth_kwargs=kwargs,
|
auth_kwargs=kwargs,
|
||||||
@ -298,11 +311,64 @@ class VeoVideoGenerationNode(ComfyNodeABC):
|
|||||||
return (VideoFromFile(video_io),)
|
return (VideoFromFile(video_io),)
|
||||||
|
|
||||||
|
|
||||||
# Register the node
|
class Veo3VideoGenerationNode(VeoVideoGenerationNode):
|
||||||
|
"""
|
||||||
|
Generates videos from text prompts using Google's Veo 3 API.
|
||||||
|
|
||||||
|
Supported models:
|
||||||
|
- veo-3.0-generate-001
|
||||||
|
- veo-3.0-fast-generate-001
|
||||||
|
|
||||||
|
This node extends the base Veo node with Veo 3 specific features including
|
||||||
|
audio generation and fixed 8-second duration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
parent_input = super().INPUT_TYPES()
|
||||||
|
|
||||||
|
# Update model options for Veo 3
|
||||||
|
parent_input["optional"]["model"] = (
|
||||||
|
IO.COMBO,
|
||||||
|
{
|
||||||
|
"options": ["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
|
||||||
|
"default": "veo-3.0-generate-001",
|
||||||
|
"tooltip": "Veo 3 model to use for video generation",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add generateAudio parameter
|
||||||
|
parent_input["optional"]["generate_audio"] = (
|
||||||
|
IO.BOOLEAN,
|
||||||
|
{
|
||||||
|
"default": False,
|
||||||
|
"tooltip": "Generate audio for the video. Supported by all Veo 3 models.",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update duration constraints for Veo 3 (only 8 seconds supported)
|
||||||
|
parent_input["optional"]["duration_seconds"] = (
|
||||||
|
IO.INT,
|
||||||
|
{
|
||||||
|
"default": 8,
|
||||||
|
"min": 8,
|
||||||
|
"max": 8,
|
||||||
|
"step": 1,
|
||||||
|
"display": "number",
|
||||||
|
"tooltip": "Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return parent_input
|
||||||
|
|
||||||
|
|
||||||
|
# Register the nodes
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"VeoVideoGenerationNode": VeoVideoGenerationNode,
|
"VeoVideoGenerationNode": VeoVideoGenerationNode,
|
||||||
|
"Veo3VideoGenerationNode": Veo3VideoGenerationNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
"VeoVideoGenerationNode": "Google Veo 2 Video Generation",
|
"VeoVideoGenerationNode": "Google Veo 2 Video Generation",
|
||||||
|
"Veo3VideoGenerationNode": "Google Veo 3 Video Generation",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -314,6 +314,29 @@ class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBl
|
|||||||
|
|
||||||
return {"required": arg_dict}
|
return {"required": arg_dict}
|
||||||
|
|
||||||
|
class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks):
|
||||||
|
CATEGORY = "advanced/model_merging/model_specific"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
arg_dict = { "model1": ("MODEL",),
|
||||||
|
"model2": ("MODEL",)}
|
||||||
|
|
||||||
|
argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
|
||||||
|
|
||||||
|
arg_dict["pos_embeds."] = argument
|
||||||
|
arg_dict["img_in."] = argument
|
||||||
|
arg_dict["txt_norm."] = argument
|
||||||
|
arg_dict["txt_in."] = argument
|
||||||
|
arg_dict["time_text_embed."] = argument
|
||||||
|
|
||||||
|
for i in range(60):
|
||||||
|
arg_dict["transformer_blocks.{}.".format(i)] = argument
|
||||||
|
|
||||||
|
arg_dict["proj_out."] = argument
|
||||||
|
|
||||||
|
return {"required": arg_dict}
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"ModelMergeSD1": ModelMergeSD1,
|
"ModelMergeSD1": ModelMergeSD1,
|
||||||
"ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
|
"ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
|
||||||
@ -329,4 +352,5 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"ModelMergeWAN2_1": ModelMergeWAN2_1,
|
"ModelMergeWAN2_1": ModelMergeWAN2_1,
|
||||||
"ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
|
"ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
|
||||||
"ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
|
"ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
|
||||||
|
"ModelMergeQwenImage": ModelMergeQwenImage,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.3.48"
|
__version__ = "0.3.49"
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.3.48"
|
version = "0.3.49"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
comfyui-frontend-package==1.23.4
|
comfyui-frontend-package==1.23.4
|
||||||
comfyui-workflow-templates==0.1.47
|
comfyui-workflow-templates==0.1.52
|
||||||
comfyui-embedded-docs==0.2.4
|
comfyui-embedded-docs==0.2.4
|
||||||
comfyui_manager
|
comfyui_manager
|
||||||
torch
|
torch
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user