Merge branch 'master' into dr-support-pip-cm

This commit is contained in:
Dr.Lt.Data 2025-08-07 07:45:14 +09:00
commit 2fe58571e2
13 changed files with 2712 additions and 101 deletions

1
.gitattributes vendored
View File

@ -1,2 +1,3 @@
/web/assets/** linguist-generated /web/assets/** linguist-generated
/web/** linguist-vendored /web/** linguist-vendored
comfy_api_nodes/apis/__init__.py linguist-generated

View File

@ -66,6 +66,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
- [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/) - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
- [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/) - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
- [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/) - [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
- [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
- Image Editing Models - Image Editing Models
- [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/) - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
- [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model) - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)

View File

@ -8,7 +8,7 @@ from einops import repeat
from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
from comfy.ldm.modules.attention import optimized_attention_masked from comfy.ldm.modules.attention import optimized_attention_masked
from comfy.ldm.flux.layers import EmbedND from comfy.ldm.flux.layers import EmbedND
import comfy.ldm.common_dit
class GELU(nn.Module): class GELU(nn.Module):
def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None): def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
@ -364,8 +364,9 @@ class QwenImageTransformer2DModel(nn.Module):
image_rotary_emb = self.pos_embeds(x, context) image_rotary_emb = self.pos_embeds(x, context)
orig_shape = x.shape hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (1, self.patch_size, self.patch_size))
hidden_states = x.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2) orig_shape = hidden_states.shape
hidden_states = hidden_states.view(orig_shape[0], orig_shape[1], orig_shape[-2] // 2, 2, orig_shape[-1] // 2, 2)
hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5) hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5)
hidden_states = hidden_states.reshape(orig_shape[0], (orig_shape[-2] // 2) * (orig_shape[-1] // 2), orig_shape[1] * 4) hidden_states = hidden_states.reshape(orig_shape[0], (orig_shape[-2] // 2) * (orig_shape[-1] // 2), orig_shape[1] * 4)
@ -396,4 +397,4 @@ class QwenImageTransformer2DModel(nn.Module):
hidden_states = hidden_states.view(orig_shape[0], orig_shape[-2] // 2, orig_shape[-1] // 2, orig_shape[1], 2, 2) hidden_states = hidden_states.view(orig_shape[0], orig_shape[-2] // 2, orig_shape[-1] // 2, orig_shape[1], 2, 2)
hidden_states = hidden_states.permute(0, 3, 1, 4, 2, 5) hidden_states = hidden_states.permute(0, 3, 1, 4, 2, 5)
return hidden_states.reshape(orig_shape) return hidden_states.reshape(orig_shape)[:, :, :, :x.shape[-2], :x.shape[-1]]

View File

@ -293,6 +293,15 @@ def model_lora_keys_unet(model, key_map={}):
key_lora = k[len("diffusion_model."):-len(".weight")] key_lora = k[len("diffusion_model."):-len(".weight")]
key_map["{}".format(key_lora)] = k key_map["{}".format(key_lora)] = k
if isinstance(model, comfy.model_base.QwenImage):
for k in sdk:
if k.startswith("diffusion_model.") and k.endswith(".weight"): #QwenImage lora format
key_lora = k[len("diffusion_model."):-len(".weight")]
# Direct mapping for transformer_blocks format (QwenImage LoRA format)
key_map["{}".format(key_lora)] = k
# Support transformer prefix format
key_map["transformer.{}".format(key_lora)] = k
return key_map return key_map

View File

@ -1237,7 +1237,7 @@ class QwenImage(supported_models_base.BASE):
sampling_settings = { sampling_settings = {
"multiplier": 1.0, "multiplier": 1.0,
"shift": 2.6, "shift": 1.15,
} }
memory_usage_factor = 1.8 #TODO memory_usage_factor = 1.8 #TODO

View File

@ -96,6 +96,7 @@ class LoRAAdapter(WeightAdapterBase):
diffusers3_lora = "{}.lora.up.weight".format(x) diffusers3_lora = "{}.lora.up.weight".format(x)
mochi_lora = "{}.lora_B".format(x) mochi_lora = "{}.lora_B".format(x)
transformers_lora = "{}.lora_linear_layer.up.weight".format(x) transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
qwen_default_lora = "{}.lora_B.default.weight".format(x)
A_name = None A_name = None
if regular_lora in lora.keys(): if regular_lora in lora.keys():
@ -122,6 +123,10 @@ class LoRAAdapter(WeightAdapterBase):
A_name = transformers_lora A_name = transformers_lora
B_name = "{}.lora_linear_layer.down.weight".format(x) B_name = "{}.lora_linear_layer.down.weight".format(x)
mid_name = None mid_name = None
elif qwen_default_lora in lora.keys():
A_name = qwen_default_lora
B_name = "{}.lora_A.default.weight".format(x)
mid_name = None
if A_name is not None: if A_name is not None:
mid = None mid = None

File diff suppressed because it is too large Load Diff

View File

@ -127,7 +127,7 @@ class TripoTextToModelRequest(BaseModel):
type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task') type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task')
prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024) prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024)
negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024) negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024)
model_version: Optional[TripoModelVersion] = TripoModelVersion.V2_5 model_version: Optional[TripoModelVersion] = TripoModelVersion.v2_5_20250123
face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to')
texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model')
pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model')

View File

@ -8,10 +8,10 @@ from typing import Optional
from comfy.comfy_types.node_typing import IO, ComfyNodeABC from comfy.comfy_types.node_typing import IO, ComfyNodeABC
from comfy_api.input_impl.video_types import VideoFromFile from comfy_api.input_impl.video_types import VideoFromFile
from comfy_api_nodes.apis import ( from comfy_api_nodes.apis import (
Veo2GenVidRequest, VeoGenVidRequest,
Veo2GenVidResponse, VeoGenVidResponse,
Veo2GenVidPollRequest, VeoGenVidPollRequest,
Veo2GenVidPollResponse VeoGenVidPollResponse
) )
from comfy_api_nodes.apis.client import ( from comfy_api_nodes.apis.client import (
ApiEndpoint, ApiEndpoint,
@ -35,7 +35,7 @@ def convert_image_to_base64(image: torch.Tensor):
return tensor_to_base64_string(scaled_image) return tensor_to_base64_string(scaled_image)
def get_video_url_from_response(poll_response: Veo2GenVidPollResponse) -> Optional[str]: def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optional[str]:
if ( if (
poll_response.response poll_response.response
and hasattr(poll_response.response, "videos") and hasattr(poll_response.response, "videos")
@ -130,6 +130,14 @@ class VeoVideoGenerationNode(ComfyNodeABC):
"default": None, "default": None,
"tooltip": "Optional reference image to guide video generation", "tooltip": "Optional reference image to guide video generation",
}), }),
"model": (
IO.COMBO,
{
"options": ["veo-2.0-generate-001"],
"default": "veo-2.0-generate-001",
"tooltip": "Veo 2 model to use for video generation",
},
),
}, },
"hidden": { "hidden": {
"auth_token": "AUTH_TOKEN_COMFY_ORG", "auth_token": "AUTH_TOKEN_COMFY_ORG",
@ -141,7 +149,7 @@ class VeoVideoGenerationNode(ComfyNodeABC):
RETURN_TYPES = (IO.VIDEO,) RETURN_TYPES = (IO.VIDEO,)
FUNCTION = "generate_video" FUNCTION = "generate_video"
CATEGORY = "api node/video/Veo" CATEGORY = "api node/video/Veo"
DESCRIPTION = "Generates videos from text prompts using Google's Veo API" DESCRIPTION = "Generates videos from text prompts using Google's Veo 2 API"
API_NODE = True API_NODE = True
def generate_video( def generate_video(
@ -154,6 +162,8 @@ class VeoVideoGenerationNode(ComfyNodeABC):
person_generation="ALLOW", person_generation="ALLOW",
seed=0, seed=0,
image=None, image=None,
model="veo-2.0-generate-001",
generate_audio=False,
unique_id: Optional[str] = None, unique_id: Optional[str] = None,
**kwargs, **kwargs,
): ):
@ -188,16 +198,19 @@ class VeoVideoGenerationNode(ComfyNodeABC):
parameters["negativePrompt"] = negative_prompt parameters["negativePrompt"] = negative_prompt
if seed > 0: if seed > 0:
parameters["seed"] = seed parameters["seed"] = seed
# Only add generateAudio for Veo 3 models
if "veo-3.0" in model:
parameters["generateAudio"] = generate_audio
# Initial request to start video generation # Initial request to start video generation
initial_operation = SynchronousOperation( initial_operation = SynchronousOperation(
endpoint=ApiEndpoint( endpoint=ApiEndpoint(
path="/proxy/veo/generate", path=f"/proxy/veo/{model}/generate",
method=HttpMethod.POST, method=HttpMethod.POST,
request_model=Veo2GenVidRequest, request_model=VeoGenVidRequest,
response_model=Veo2GenVidResponse response_model=VeoGenVidResponse
), ),
request=Veo2GenVidRequest( request=VeoGenVidRequest(
instances=instances, instances=instances,
parameters=parameters parameters=parameters
), ),
@ -223,16 +236,16 @@ class VeoVideoGenerationNode(ComfyNodeABC):
# Define the polling operation # Define the polling operation
poll_operation = PollingOperation( poll_operation = PollingOperation(
poll_endpoint=ApiEndpoint( poll_endpoint=ApiEndpoint(
path="/proxy/veo/poll", path=f"/proxy/veo/{model}/poll",
method=HttpMethod.POST, method=HttpMethod.POST,
request_model=Veo2GenVidPollRequest, request_model=VeoGenVidPollRequest,
response_model=Veo2GenVidPollResponse response_model=VeoGenVidPollResponse
), ),
completed_statuses=["completed"], completed_statuses=["completed"],
failed_statuses=[], # No failed statuses, we'll handle errors after polling failed_statuses=[], # No failed statuses, we'll handle errors after polling
status_extractor=status_extractor, status_extractor=status_extractor,
progress_extractor=progress_extractor, progress_extractor=progress_extractor,
request=Veo2GenVidPollRequest( request=VeoGenVidPollRequest(
operationName=operation_name operationName=operation_name
), ),
auth_kwargs=kwargs, auth_kwargs=kwargs,
@ -298,11 +311,64 @@ class VeoVideoGenerationNode(ComfyNodeABC):
return (VideoFromFile(video_io),) return (VideoFromFile(video_io),)
# Register the node class Veo3VideoGenerationNode(VeoVideoGenerationNode):
"""
Generates videos from text prompts using Google's Veo 3 API.
Supported models:
- veo-3.0-generate-001
- veo-3.0-fast-generate-001
This node extends the base Veo node with Veo 3 specific features including
audio generation and fixed 8-second duration.
"""
@classmethod
def INPUT_TYPES(s):
parent_input = super().INPUT_TYPES()
# Update model options for Veo 3
parent_input["optional"]["model"] = (
IO.COMBO,
{
"options": ["veo-3.0-generate-001", "veo-3.0-fast-generate-001"],
"default": "veo-3.0-generate-001",
"tooltip": "Veo 3 model to use for video generation",
},
)
# Add generateAudio parameter
parent_input["optional"]["generate_audio"] = (
IO.BOOLEAN,
{
"default": False,
"tooltip": "Generate audio for the video. Supported by all Veo 3 models.",
}
)
# Update duration constraints for Veo 3 (only 8 seconds supported)
parent_input["optional"]["duration_seconds"] = (
IO.INT,
{
"default": 8,
"min": 8,
"max": 8,
"step": 1,
"display": "number",
"tooltip": "Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
},
)
return parent_input
# Register the nodes
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"VeoVideoGenerationNode": VeoVideoGenerationNode, "VeoVideoGenerationNode": VeoVideoGenerationNode,
"Veo3VideoGenerationNode": Veo3VideoGenerationNode,
} }
NODE_DISPLAY_NAME_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = {
"VeoVideoGenerationNode": "Google Veo2 Video Generation", "VeoVideoGenerationNode": "Google Veo 2 Video Generation",
"Veo3VideoGenerationNode": "Google Veo 3 Video Generation",
} }

View File

@ -314,6 +314,29 @@ class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBl
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific"
@classmethod
def INPUT_TYPES(s):
arg_dict = { "model1": ("MODEL",),
"model2": ("MODEL",)}
argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
arg_dict["pos_embeds."] = argument
arg_dict["img_in."] = argument
arg_dict["txt_norm."] = argument
arg_dict["txt_in."] = argument
arg_dict["time_text_embed."] = argument
for i in range(60):
arg_dict["transformer_blocks.{}.".format(i)] = argument
arg_dict["proj_out."] = argument
return {"required": arg_dict}
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"ModelMergeSD1": ModelMergeSD1, "ModelMergeSD1": ModelMergeSD1,
"ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
@ -329,4 +352,5 @@ NODE_CLASS_MAPPINGS = {
"ModelMergeWAN2_1": ModelMergeWAN2_1, "ModelMergeWAN2_1": ModelMergeWAN2_1,
"ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B, "ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
"ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B, "ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
"ModelMergeQwenImage": ModelMergeQwenImage,
} }

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.3.48" __version__ = "0.3.49"

View File

@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.3.48" version = "0.3.49"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.23.4 comfyui-frontend-package==1.23.4
comfyui-workflow-templates==0.1.47 comfyui-workflow-templates==0.1.52
comfyui-embedded-docs==0.2.4 comfyui-embedded-docs==0.2.4
comfyui_manager comfyui_manager
torch torch