Merge branch 'master' into trim_audio

2026-05-14 19:17:32 +08:00 · 2026-05-14 10:49:48 +08:00 · 2026-05-14 10:49:48 +08:00 · cd944fcfdf
commit cd944fcfdf
parent bfc5d50b6c fb51a988b6
7 changed files with 365 additions and 7 deletions
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -1164,12 +1164,18 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am

            o = out
            o_d = out_div
+            ps_view = ps
+            mask_view = mask
            for d in range(dims):
-                o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
-                o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2])
+                l = min(ps_view.shape[d + 2], o.shape[d + 2] - upscaled[d])
+                o = o.narrow(d + 2, upscaled[d], l)
+                o_d = o_d.narrow(d + 2, upscaled[d], l)
+                if l < ps_view.shape[d + 2]:
+                    ps_view = ps_view.narrow(d + 2, 0, l)
+                    mask_view = mask_view.narrow(d + 2, 0, l)

-            o.add_(ps * mask)
-            o_d.add_(mask)
+            o.add_(ps_view * mask_view)
+            o_d.add_(mask_view)

            if pbar is not None:
                pbar.update(1)
--- a/comfy_api_nodes/apis/anthropic.py
+++ b/comfy_api_nodes/apis/anthropic.py
@ -0,0 +1,75 @@
+from enum import Enum
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class AnthropicRole(str, Enum):
+    user = "user"
+    assistant = "assistant"
+
+
+class AnthropicTextContent(BaseModel):
+    type: Literal["text"] = "text"
+    text: str = Field(...)
+
+
+class AnthropicImageSourceBase64(BaseModel):
+    type: Literal["base64"] = "base64"
+    media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
+    data: str = Field(..., description="Base64-encoded image data")
+
+
+class AnthropicImageSourceUrl(BaseModel):
+    type: Literal["url"] = "url"
+    url: str = Field(...)
+
+
+class AnthropicImageContent(BaseModel):
+    type: Literal["image"] = "image"
+    source: AnthropicImageSourceBase64 | AnthropicImageSourceUrl = Field(...)
+
+
+class AnthropicMessage(BaseModel):
+    role: AnthropicRole = Field(...)
+    content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
+
+
+class AnthropicMessagesRequest(BaseModel):
+    model: str = Field(...)
+    messages: list[AnthropicMessage] = Field(...)
+    max_tokens: int = Field(..., ge=1)
+    system: str | None = Field(None, description="Top-level system prompt")
+    temperature: float | None = Field(None, ge=0.0, le=1.0)
+    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    top_k: int | None = Field(None, ge=0)
+    stop_sequences: list[str] | None = Field(None)
+
+
+class AnthropicResponseTextBlock(BaseModel):
+    type: Literal["text"] = "text"
+    text: str = Field(...)
+
+
+class AnthropicCacheCreationUsage(BaseModel):
+    ephemeral_5m_input_tokens: int | None = Field(None)
+    ephemeral_1h_input_tokens: int | None = Field(None)
+
+
+class AnthropicMessagesUsage(BaseModel):
+    input_tokens: int | None = Field(None)
+    output_tokens: int | None = Field(None)
+    cache_creation_input_tokens: int | None = Field(None)
+    cache_read_input_tokens: int | None = Field(None)
+    cache_creation: AnthropicCacheCreationUsage | None = Field(None)
+
+
+class AnthropicMessagesResponse(BaseModel):
+    id: str | None = Field(None)
+    type: str | None = Field(None)
+    role: str | None = Field(None)
+    model: str | None = Field(None)
+    content: list[AnthropicResponseTextBlock] | None = Field(None)
+    stop_reason: str | None = Field(None)
+    stop_sequence: str | None = Field(None)
+    usage: AnthropicMessagesUsage | None = Field(None)
--- a/comfy_api_nodes/nodes_anthropic.py
+++ b/comfy_api_nodes/nodes_anthropic.py
@ -0,0 +1,245 @@
+"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
+
+from typing_extensions import override
+
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.anthropic import (
+    AnthropicImageContent,
+    AnthropicImageSourceUrl,
+    AnthropicMessage,
+    AnthropicMessagesRequest,
+    AnthropicMessagesResponse,
+    AnthropicRole,
+    AnthropicTextContent,
+)
+from comfy_api_nodes.util import (
+    ApiEndpoint,
+    get_number_of_images,
+    sync_op,
+    upload_images_to_comfyapi,
+    validate_string,
+)
+
+ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
+ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568
+CLAUDE_MAX_IMAGES = 20
+
+CLAUDE_MODELS: dict[str, str] = {
+    "Opus 4.7": "claude-opus-4-7",
+    "Opus 4.6": "claude-opus-4-6",
+    "Sonnet 4.6": "claude-sonnet-4-6",
+    "Sonnet 4.5": "claude-sonnet-4-5-20250929",
+    "Haiku 4.5": "claude-haiku-4-5-20251001",
+}
+
+
+def _claude_model_inputs():
+    return [
+        IO.Int.Input(
+            "max_tokens",
+            default=16000,
+            min=32,
+            max=32000,
+            tooltip="Maximum number of tokens to generate before stopping.",
+            advanced=True,
+        ),
+        IO.Float.Input(
+            "temperature",
+            default=1.0,
+            min=0.0,
+            max=1.0,
+            step=0.01,
+            tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
+            advanced=True,
+        ),
+    ]
+
+
+def _model_price_per_million(model: str) -> tuple[float, float] | None:
+    """Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
+    if "opus-4-7" in model or "opus-4-6" in model or "opus-4-5" in model:
+        return 5.0, 25.0
+    if "sonnet-4" in model:
+        return 3.0, 15.0
+    if "haiku-4-5" in model:
+        return 1.0, 5.0
+    return None
+
+
+def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
+    """Compute approximate USD price from response usage. Server-side billing is authoritative."""
+    if not response.usage or not response.model:
+        return None
+    rates = _model_price_per_million(response.model)
+    if rates is None:
+        return None
+    input_rate, output_rate = rates
+    input_tokens = response.usage.input_tokens or 0
+    output_tokens = response.usage.output_tokens or 0
+    cache_read = response.usage.cache_read_input_tokens or 0
+    cache_5m = 0
+    cache_1h = 0
+    if response.usage.cache_creation:
+        cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
+        cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
+    total = (
+        input_tokens * input_rate
+        + output_tokens * output_rate
+        + cache_read * input_rate * 0.1
+        + cache_5m * input_rate * 1.25
+        + cache_1h * input_rate * 2.0
+    )
+    return total / 1_000_000.0
+
+
+def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
+    if not response.content:
+        return ""
+    return "\n".join(block.text for block in response.content if block.text)
+
+
+async def _build_image_content_blocks(
+    cls: type[IO.ComfyNode],
+    image_tensors: list[Input.Image],
+) -> list[AnthropicImageContent]:
+    urls = await upload_images_to_comfyapi(
+        cls,
+        image_tensors,
+        max_images=CLAUDE_MAX_IMAGES,
+        total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS,
+        wait_label="Uploading reference images",
+    )
+    return [AnthropicImageContent(source=AnthropicImageSourceUrl(url=url)) for url in urls]
+
+
+class ClaudeNode(IO.ComfyNode):
+    """Generate text responses from an Anthropic Claude model."""
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ClaudeNode",
+            display_name="Anthropic Claude",
+            category="api node/text/Anthropic",
+            essentials_category="Text Generation",
+            description="Generate text responses with Anthropic's Claude models. "
+            "Provide a text prompt and optionally one or more images for multimodal context.",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text input to the model.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
+                    tooltip="The Claude model used to generate the response.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+                IO.Autogrow.Input(
+                    "images",
+                    template=IO.Autogrow.TemplateNames(
+                        IO.Image.Input("image"),
+                        names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
+                        min=0,
+                    ),
+                    tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
+                ),
+                IO.String.Input(
+                    "system_prompt",
+                    multiline=True,
+                    default="",
+                    optional=True,
+                    advanced=True,
+                    tooltip="Foundational instructions that dictate the model's behavior.",
+                ),
+            ],
+            outputs=[IO.String.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model"]),
+                expr="""
+                (
+                  $m := widgets.model;
+                  $contains($m, "opus") ? {
+                    "type": "list_usd",
+                    "usd": [0.005, 0.025],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : $contains($m, "sonnet") ? {
+                    "type": "list_usd",
+                    "usd": [0.003, 0.015],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : $contains($m, "haiku") ? {
+                    "type": "list_usd",
+                    "usd": [0.001, 0.005],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : {"type":"text", "text":"Token-based"}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        model: dict,
+        seed: int,
+        images: dict | None = None,
+        system_prompt: str = "",
+    ) -> IO.NodeOutput:
+        validate_string(prompt, strip_whitespace=True, min_length=1)
+        model_label = model["model"]
+        max_tokens = model["max_tokens"]
+        temperature = model["temperature"]
+
+        image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
+        if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
+            raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
+
+        content: list[AnthropicTextContent | AnthropicImageContent] = []
+        if image_tensors:
+            content.extend(await _build_image_content_blocks(cls, image_tensors))
+        content.append(AnthropicTextContent(text=prompt))
+
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
+            response_model=AnthropicMessagesResponse,
+            data=AnthropicMessagesRequest(
+                model=CLAUDE_MODELS[model_label],
+                max_tokens=max_tokens,
+                messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
+                system=system_prompt or None,
+                temperature=temperature,
+            ),
+            price_extractor=calculate_tokens_price,
+        )
+        return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
+
+
+class AnthropicExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [ClaudeNode]
+
+
+async def comfy_entrypoint() -> AnthropicExtension:
+    return AnthropicExtension()
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.21.0"
+__version__ = "0.21.1"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.21.0"
+version = "0.21.1"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.43.18
-comfyui-workflow-templates==0.9.73
+comfyui-workflow-templates==0.9.75
 comfyui-embedded-docs==0.5.0
 torch
 torchsde
--- a/tests-unit/comfy_test/model_detection_test.py
+++ b/tests-unit/comfy_test/model_detection_test.py
@ -1,9 +1,23 @@
+from collections import defaultdict
+
 import torch

 from comfy.model_detection import detect_unet_config, model_config_from_unet_config
 import comfy.supported_models


+def _freeze(value):
+    """Recursively convert a value to a hashable form so configs can be
+    compared/used as dict keys or set members."""
+    if isinstance(value, dict):
+        return frozenset((k, _freeze(v)) for k, v in value.items())
+    if isinstance(value, (list, tuple)):
+        return tuple(_freeze(v) for v in value)
+    if isinstance(value, set):
+        return frozenset(_freeze(v) for v in value)
+    return value
+
+
 def _make_longcat_comfyui_sd():
    """Minimal ComfyUI-format state dict for pre-converted LongCat-Image weights."""
    sd = {}
@ -110,3 +124,21 @@ class TestModelDetection:
        model_config = model_config_from_unet_config(unet_config, sd)
        assert model_config is not None
        assert type(model_config).__name__ == "FluxSchnell"
+
+    def test_unet_config_and_required_keys_combination_is_unique(self):
+        """Each model in the registry must have a unique combination of
+        ``unet_config`` and ``required_keys``. If two models share the same
+        combination, ``BASE.matches`` cannot disambiguate between them and the
+        first one in the list will always win."""
+        models = comfy.supported_models.models
+        groups = defaultdict(list)
+        for model in models:
+            key = (_freeze(model.unet_config), _freeze(model.required_keys))
+            groups[key].append(model.__name__)
+
+        duplicates = {k: names for k, names in groups.items() if len(names) > 1}
+        assert not duplicates, (
+            "Found models sharing the same (unet_config, required_keys) "
+            "combination, which makes detection ambiguous: "
+            + "; ".join(", ".join(names) for names in duplicates.values())
+        )