Compare commits

..

10 Commits

Author SHA1 Message Date
kijai
266207de4c Fix normal rendering 2026-05-14 13:41:19 +03:00
kijai
beba44772e Merge remote-tracking branch 'upstream/master' into moge 2026-05-14 12:56:42 +03:00
kijai
4cc929e8a1 Update some tooltips 2026-05-14 12:56:39 +03:00
kijai
7a5aff1946 Slice alpha channel 2026-05-14 12:52:16 +03:00
Jukka Seppänen
1f28908d6e
Make audio processing nodes handle None -inputs (#13879)
Some checks are pending
Build package / Build Test (3.10) (push) Waiting to run
Build package / Build Test (3.11) (push) Waiting to run
Build package / Build Test (3.12) (push) Waiting to run
Build package / Build Test (3.13) (push) Waiting to run
Build package / Build Test (3.14) (push) Waiting to run
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-05-14 10:51:35 +08:00
Talmaj
fb51a988b6
Add test that each model has unique identifiers CORE-134 (#13654) 2026-05-14 10:41:25 +08:00
comfyanonymous
26515acd23 ComfyUI v0.21.1 2026-05-13 16:25:01 -04:00
Talmaj
74c17a25e5
Fix void failing with RuntimeError: start (0) + length (464) exceeds dimension size (461). (#13873) 2026-05-13 12:37:30 -07:00
Daxiong (Lin)
afb4fa15d5
chore: update workflow templates to v0.9.75 (#13877)
Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
2026-05-13 12:33:12 -07:00
Alexander Piskun
b94941d8d3
[Partner Nodes] add Claude LLM node (#13867)
* [Partner Nodes] add Claude LLM node

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* [Partner Nodes] add seed param

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* [Partner Nodes] use image urls instead of base64

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* [Partner Nodes] fixed pricing for the claude 4.7

Signed-off-by: bigcat88 <bigcat88@icloud.com>

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-05-13 12:24:58 -07:00
9 changed files with 431 additions and 36 deletions

View File

@ -1164,12 +1164,18 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
o = out
o_d = out_div
ps_view = ps
mask_view = mask
for d in range(dims):
o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2])
l = min(ps_view.shape[d + 2], o.shape[d + 2] - upscaled[d])
o = o.narrow(d + 2, upscaled[d], l)
o_d = o_d.narrow(d + 2, upscaled[d], l)
if l < ps_view.shape[d + 2]:
ps_view = ps_view.narrow(d + 2, 0, l)
mask_view = mask_view.narrow(d + 2, 0, l)
o.add_(ps * mask)
o_d.add_(mask)
o.add_(ps_view * mask_view)
o_d.add_(mask_view)
if pbar is not None:
pbar.update(1)

View File

@ -0,0 +1,75 @@
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field
class AnthropicRole(str, Enum):
user = "user"
assistant = "assistant"
class AnthropicTextContent(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicImageSourceBase64(BaseModel):
type: Literal["base64"] = "base64"
media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
data: str = Field(..., description="Base64-encoded image data")
class AnthropicImageSourceUrl(BaseModel):
type: Literal["url"] = "url"
url: str = Field(...)
class AnthropicImageContent(BaseModel):
type: Literal["image"] = "image"
source: AnthropicImageSourceBase64 | AnthropicImageSourceUrl = Field(...)
class AnthropicMessage(BaseModel):
role: AnthropicRole = Field(...)
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
class AnthropicMessagesRequest(BaseModel):
model: str = Field(...)
messages: list[AnthropicMessage] = Field(...)
max_tokens: int = Field(..., ge=1)
system: str | None = Field(None, description="Top-level system prompt")
temperature: float | None = Field(None, ge=0.0, le=1.0)
top_p: float | None = Field(None, ge=0.0, le=1.0)
top_k: int | None = Field(None, ge=0)
stop_sequences: list[str] | None = Field(None)
class AnthropicResponseTextBlock(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicCacheCreationUsage(BaseModel):
ephemeral_5m_input_tokens: int | None = Field(None)
ephemeral_1h_input_tokens: int | None = Field(None)
class AnthropicMessagesUsage(BaseModel):
input_tokens: int | None = Field(None)
output_tokens: int | None = Field(None)
cache_creation_input_tokens: int | None = Field(None)
cache_read_input_tokens: int | None = Field(None)
cache_creation: AnthropicCacheCreationUsage | None = Field(None)
class AnthropicMessagesResponse(BaseModel):
id: str | None = Field(None)
type: str | None = Field(None)
role: str | None = Field(None)
model: str | None = Field(None)
content: list[AnthropicResponseTextBlock] | None = Field(None)
stop_reason: str | None = Field(None)
stop_sequence: str | None = Field(None)
usage: AnthropicMessagesUsage | None = Field(None)

View File

@ -0,0 +1,245 @@
"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api_nodes.apis.anthropic import (
AnthropicImageContent,
AnthropicImageSourceUrl,
AnthropicMessage,
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicRole,
AnthropicTextContent,
)
from comfy_api_nodes.util import (
ApiEndpoint,
get_number_of_images,
sync_op,
upload_images_to_comfyapi,
validate_string,
)
ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568
CLAUDE_MAX_IMAGES = 20
CLAUDE_MODELS: dict[str, str] = {
"Opus 4.7": "claude-opus-4-7",
"Opus 4.6": "claude-opus-4-6",
"Sonnet 4.6": "claude-sonnet-4-6",
"Sonnet 4.5": "claude-sonnet-4-5-20250929",
"Haiku 4.5": "claude-haiku-4-5-20251001",
}
def _claude_model_inputs():
return [
IO.Int.Input(
"max_tokens",
default=16000,
min=32,
max=32000,
tooltip="Maximum number of tokens to generate before stopping.",
advanced=True,
),
IO.Float.Input(
"temperature",
default=1.0,
min=0.0,
max=1.0,
step=0.01,
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
advanced=True,
),
]
def _model_price_per_million(model: str) -> tuple[float, float] | None:
"""Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
if "opus-4-7" in model or "opus-4-6" in model or "opus-4-5" in model:
return 5.0, 25.0
if "sonnet-4" in model:
return 3.0, 15.0
if "haiku-4-5" in model:
return 1.0, 5.0
return None
def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
"""Compute approximate USD price from response usage. Server-side billing is authoritative."""
if not response.usage or not response.model:
return None
rates = _model_price_per_million(response.model)
if rates is None:
return None
input_rate, output_rate = rates
input_tokens = response.usage.input_tokens or 0
output_tokens = response.usage.output_tokens or 0
cache_read = response.usage.cache_read_input_tokens or 0
cache_5m = 0
cache_1h = 0
if response.usage.cache_creation:
cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
total = (
input_tokens * input_rate
+ output_tokens * output_rate
+ cache_read * input_rate * 0.1
+ cache_5m * input_rate * 1.25
+ cache_1h * input_rate * 2.0
)
return total / 1_000_000.0
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
if not response.content:
return ""
return "\n".join(block.text for block in response.content if block.text)
async def _build_image_content_blocks(
cls: type[IO.ComfyNode],
image_tensors: list[Input.Image],
) -> list[AnthropicImageContent]:
urls = await upload_images_to_comfyapi(
cls,
image_tensors,
max_images=CLAUDE_MAX_IMAGES,
total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS,
wait_label="Uploading reference images",
)
return [AnthropicImageContent(source=AnthropicImageSourceUrl(url=url)) for url in urls]
class ClaudeNode(IO.ComfyNode):
"""Generate text responses from an Anthropic Claude model."""
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="ClaudeNode",
display_name="Anthropic Claude",
category="api node/text/Anthropic",
essentials_category="Text Generation",
description="Generate text responses with Anthropic's Claude models. "
"Provide a text prompt and optionally one or more images for multimodal context.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text input to the model.",
),
IO.DynamicCombo.Input(
"model",
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
tooltip="The Claude model used to generate the response.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
control_after_generate=True,
tooltip="Seed controls whether the node should re-run; "
"results are non-deterministic regardless of seed.",
),
IO.Autogrow.Input(
"images",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("image"),
names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
min=0,
),
tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
),
IO.String.Input(
"system_prompt",
multiline=True,
default="",
optional=True,
advanced=True,
tooltip="Foundational instructions that dictate the model's behavior.",
),
],
outputs=[IO.String.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
expr="""
(
$m := widgets.model;
$contains($m, "opus") ? {
"type": "list_usd",
"usd": [0.005, 0.025],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "sonnet") ? {
"type": "list_usd",
"usd": [0.003, 0.015],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "haiku") ? {
"type": "list_usd",
"usd": [0.001, 0.005],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: {"type":"text", "text":"Token-based"}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: dict,
seed: int,
images: dict | None = None,
system_prompt: str = "",
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
model_label = model["model"]
max_tokens = model["max_tokens"]
temperature = model["temperature"]
image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
content: list[AnthropicTextContent | AnthropicImageContent] = []
if image_tensors:
content.extend(await _build_image_content_blocks(cls, image_tensors))
content.append(AnthropicTextContent(text=prompt))
response = await sync_op(
cls,
ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
response_model=AnthropicMessagesResponse,
data=AnthropicMessagesRequest(
model=CLAUDE_MODELS[model_label],
max_tokens=max_tokens,
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
system=system_prompt or None,
temperature=temperature,
),
price_extractor=calculate_tokens_price,
)
return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
class AnthropicExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [ClaudeNode]
async def comfy_entrypoint() -> AnthropicExtension:
return AnthropicExtension()

View File

@ -82,6 +82,8 @@ class VAEEncodeAudio(IO.ComfyNode):
@classmethod
def execute(cls, vae, audio) -> IO.NodeOutput:
if audio is None:
raise ValueError("VAEEncodeAudio: input audio is None (source video may have no audio track).")
sample_rate = audio["sample_rate"]
vae_sample_rate = getattr(vae, "audio_sample_rate", 44100)
if vae_sample_rate != sample_rate:
@ -171,6 +173,8 @@ class SaveAudio(IO.ComfyNode):
@classmethod
def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> IO.NodeOutput:
if audio is None:
raise ValueError("SaveAudio: input audio is None (source video may have no audio track).")
return IO.NodeOutput(
ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format)
)
@ -198,6 +202,8 @@ class SaveAudioMP3(IO.ComfyNode):
@classmethod
def execute(cls, audio, filename_prefix="ComfyUI", format="mp3", quality="128k") -> IO.NodeOutput:
if audio is None:
raise ValueError("SaveAudioMP3: input audio is None (source video may have no audio track).")
return IO.NodeOutput(
ui=UI.AudioSaveHelper.get_save_audio_ui(
audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
@ -226,6 +232,8 @@ class SaveAudioOpus(IO.ComfyNode):
@classmethod
def execute(cls, audio, filename_prefix="ComfyUI", format="opus", quality="V3") -> IO.NodeOutput:
if audio is None:
raise ValueError("SaveAudioOpus: input audio is None (source video may have no audio track).")
return IO.NodeOutput(
ui=UI.AudioSaveHelper.get_save_audio_ui(
audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
@ -252,6 +260,8 @@ class PreviewAudio(IO.ComfyNode):
@classmethod
def execute(cls, audio) -> IO.NodeOutput:
if audio is None:
raise ValueError("PreviewAudio: input audio is None (source video may have no audio track).")
return IO.NodeOutput(ui=UI.PreviewAudio(audio, cls=cls))
save_flac = execute # TODO: remove
@ -392,21 +402,26 @@ class TrimAudioDuration(IO.ComfyNode):
@classmethod
def execute(cls, audio, start_index, duration) -> IO.NodeOutput:
if audio is None:
return IO.NodeOutput(None)
waveform = audio["waveform"]
sample_rate = audio["sample_rate"]
audio_length = waveform.shape[-1]
if audio_length == 0:
return IO.NodeOutput(audio)
if start_index < 0:
start_frame = audio_length + int(round(start_index * sample_rate))
else:
start_frame = int(round(start_index * sample_rate))
start_frame = max(0, min(start_frame, audio_length - 1))
start_frame = max(0, min(start_frame, audio_length))
end_frame = start_frame + int(round(duration * sample_rate))
end_frame = max(0, min(end_frame, audio_length))
if start_frame >= end_frame:
raise ValueError("AudioTrim: Start time must be less than end time and be within the audio length.")
raise ValueError("TrimAudioDuration: Start time must be less than end time and be within the audio length.")
return IO.NodeOutput({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate})
@ -433,11 +448,13 @@ class SplitAudioChannels(IO.ComfyNode):
@classmethod
def execute(cls, audio) -> IO.NodeOutput:
if audio is None:
return IO.NodeOutput(None, None)
waveform = audio["waveform"]
sample_rate = audio["sample_rate"]
if waveform.shape[1] != 2:
raise ValueError("AudioSplit: Input audio has only one channel.")
raise ValueError(f"AudioSplit: Input audio must be stereo (2 channels), got {waveform.shape[1]} channel(s).")
left_channel = waveform[..., 0:1, :]
right_channel = waveform[..., 1:2, :]
@ -465,6 +482,12 @@ class JoinAudioChannels(IO.ComfyNode):
@classmethod
def execute(cls, audio_left, audio_right) -> IO.NodeOutput:
if audio_left is None and audio_right is None:
return IO.NodeOutput(None)
if audio_left is None:
return IO.NodeOutput(audio_right)
if audio_right is None:
return IO.NodeOutput(audio_left)
waveform_left = audio_left["waveform"]
sample_rate_left = audio_left["sample_rate"]
waveform_right = audio_right["waveform"]
@ -538,6 +561,12 @@ class AudioConcat(IO.ComfyNode):
@classmethod
def execute(cls, audio1, audio2, direction) -> IO.NodeOutput:
if audio1 is None and audio2 is None:
return IO.NodeOutput(None)
if audio1 is None:
return IO.NodeOutput(audio2)
if audio2 is None:
return IO.NodeOutput(audio1)
waveform_1 = audio1["waveform"]
waveform_2 = audio2["waveform"]
sample_rate_1 = audio1["sample_rate"]
@ -585,6 +614,12 @@ class AudioMerge(IO.ComfyNode):
@classmethod
def execute(cls, audio1, audio2, merge_method) -> IO.NodeOutput:
if audio1 is None and audio2 is None:
return IO.NodeOutput(None)
if audio1 is None:
return IO.NodeOutput(audio2)
if audio2 is None:
return IO.NodeOutput(audio1)
waveform_1 = audio1["waveform"]
waveform_2 = audio2["waveform"]
sample_rate_1 = audio1["sample_rate"]
@ -595,6 +630,9 @@ class AudioMerge(IO.ComfyNode):
length_1 = waveform_1.shape[-1]
length_2 = waveform_2.shape[-1]
if length_1 == 0 or length_2 == 0:
return IO.NodeOutput({"waveform": waveform_1, "sample_rate": output_sample_rate})
if length_2 > length_1:
logging.info(f"AudioMerge: Trimming audio2 from {length_2} to {length_1} samples to match audio1 length.")
waveform_2 = waveform_2[..., :length_1]
@ -646,6 +684,8 @@ class AudioAdjustVolume(IO.ComfyNode):
@classmethod
def execute(cls, audio, volume) -> IO.NodeOutput:
if audio is None:
return IO.NodeOutput(None)
if volume == 0:
return IO.NodeOutput(audio)
waveform = audio["waveform"]
@ -729,8 +769,14 @@ class AudioEqualizer3Band(IO.ComfyNode):
@classmethod
def execute(cls, audio, low_gain_dB, low_freq, mid_gain_dB, mid_freq, mid_q, high_gain_dB, high_freq) -> IO.NodeOutput:
if audio is None:
return IO.NodeOutput(None)
waveform = audio["waveform"]
sample_rate = audio["sample_rate"]
if waveform.shape[-1] == 0:
return IO.NodeOutput(audio)
eq_waveform = waveform.clone()
# 1. Apply Low Shelf (Bass)

View File

@ -49,23 +49,8 @@ def _normals_from_points(points: torch.Tensor) -> torch.Tensor:
dy = pts[..., 2:, :, :] - pts[..., :-2, :, :]
dx = torch.nn.functional.pad(dx.permute(0, 3, 1, 2), (1, 1, 0, 0)).permute(0, 2, 3, 1)
dy = torch.nn.functional.pad(dy.permute(0, 3, 1, 2), (0, 0, 1, 1)).permute(0, 2, 3, 1)
n = torch.cross(dx, dy, dim=-1)
n = torch.nn.functional.normalize(n, dim=-1)
return torch.where(finite.unsqueeze(-1), n, torch.zeros_like(n))
def _screen_normals_from_depth(depth: torch.Tensor) -> torch.Tensor:
"""Screen-space surface normals (X right, Y down, Z into scene)."""
finite = torch.isfinite(depth) & (depth > 0)
d = torch.where(finite, depth, torch.zeros_like(depth))
H, W = d.shape[-2:]
d4d = d.unsqueeze(1)
# Scale gradients to normalized image coords so a 45 deg tilt lands as a 45 deg normal regardless of resolution.
dz_dx = (d4d[..., :, 2:] - d4d[..., :, :-2]) * (W / 2.0)
dz_dy = (d4d[..., 2:, :] - d4d[..., :-2, :]) * (H / 2.0)
dz_dx = torch.nn.functional.pad(dz_dx, (1, 1, 0, 0)).squeeze(1)
dz_dy = torch.nn.functional.pad(dz_dy, (0, 0, 1, 1)).squeeze(1)
n = torch.stack([-dz_dx, -dz_dy, torch.ones_like(d)], dim=-1)
# dy x dx (not dx x dy) so the result is outward-facing in OpenCV (Y-down flips the right-hand rule), matching v2's predicted normals.
n = torch.cross(dy, dx, dim=-1)
n = torch.nn.functional.normalize(n, dim=-1)
return torch.where(finite.unsqueeze(-1), n, torch.zeros_like(n))
@ -141,6 +126,7 @@ class MoGePanoramaInference(io.ComfyNode):
if image.shape[0] != 1:
raise ValueError(f"MoGePanoramaInference takes a single image (got batch of {image.shape[0]})")
image = image[..., :3]
H, W = int(image.shape[1]), int(image.shape[2])
scale = min(merge_resolution / max(H, W), 1.0)
merge_h, merge_w = max(int(H * scale), 32), max(int(W * scale), 32)
@ -244,12 +230,12 @@ class MoGeInference(io.ComfyNode):
io.Int.Input("resolution_level", default=9, min=0, max=9,
tooltip="0 = fastest, 9 = most detail."),
io.Float.Input("fov_x_degrees", default=0.0, min=0.0, max=170.0, step=0.1,
tooltip="Override horizontal FoV. 0 = auto."),
tooltip="Horizontal field of view of the source camera. Sets the focal length used to unproject the depth map into 3D. 0 = auto-recover from the predicted points."),
io.Int.Input("batch_size", default=4, min=1, max=64,
tooltip="Images per inference call. Lower if you OOM on a long video / image set."),
io.Boolean.Input("force_projection", default=True),
io.Boolean.Input("apply_mask", default=True,
tooltip="Set masked-out points/depth to inf."),
tooltip="Set masked-out (sky / invalid) pixels to inf in points and depth so meshing culls them. Disable to keep the raw predicted geometry everywhere; the mask is still returned separately."),
],
outputs=[MoGeGeometry.Output(display_name="geometry")],
)
@ -257,6 +243,7 @@ class MoGeInference(io.ComfyNode):
@classmethod
def execute(cls, moge_model, image, resolution_level, fov_x_degrees, batch_size, force_projection, apply_mask) -> io.NodeOutput:
image = image[..., :3]
bchw = image.movedim(-1, -3).contiguous()
B = bchw.shape[0]
fov = None if fov_x_degrees <= 0 else float(fov_x_degrees)
@ -294,19 +281,23 @@ class MoGeRender(io.ComfyNode):
category="image/geometry",
inputs=[
MoGeGeometry.Input("geometry"),
io.Combo.Input("output", options=["depth", "depth_colored", "normal", "normal_screen", "mask"], default="depth"),
io.Combo.Input("output", options=["depth", "depth_colored", "normal_opengl", "normal_directx", "mask"], default="depth",
tooltip="DirectX vs OpenGL controls the normal-map green-channel convention. DirectX: green = -Y down (Unreal). OpenGL: green = +Y up (Blender, Substance, Unity, glTF)."),
],
outputs=[io.Image.Output()],
)
@classmethod
def execute(cls, geometry, output) -> io.NodeOutput:
is_normal = output in ("normal_directx", "normal_opengl")
opengl = output.endswith("_opengl")
# Pick the input tensor for the chosen mode and validate availability.
if output in ("depth", "depth_colored", "normal_screen"):
if output in ("depth", "depth_colored"):
if "depth" not in geometry:
raise ValueError("MoGeGeometry has no depth output.")
src = geometry["depth"]
elif output == "normal":
elif is_normal:
if "normal" in geometry:
src = geometry["normal"]
elif "points" in geometry:
@ -330,11 +321,11 @@ class MoGeRender(io.ComfyNode):
d = _normalize_disparity(slc)
out.append(_turbo(d) if output == "depth_colored"
else d.unsqueeze(-1).expand(*d.shape, 3).contiguous())
elif output == "normal":
elif is_normal:
n = slc if "normal" in geometry else _normals_from_points(slc)
out.append((n * 0.5 + 0.5).clamp(0.0, 1.0))
elif output == "normal_screen":
n = _screen_normals_from_depth(slc)
# MoGe is OpenCV (Z+ into scene); normal-map convention is Z+ out of surface, so flip Z.
y_sign = -1.0 if opengl else 1.0
n = n * n.new_tensor([1.0, y_sign, -1.0])
out.append((n * 0.5 + 0.5).clamp(0.0, 1.0))
elif output == "mask":
out.append(slc.unsqueeze(-1).expand(*slc.shape, 3).contiguous())

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.21.0"
__version__ = "0.21.1"

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.21.0"
version = "0.21.1"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.43.18
comfyui-workflow-templates==0.9.73
comfyui-workflow-templates==0.9.75
comfyui-embedded-docs==0.5.0
torch
torchsde

View File

@ -1,9 +1,23 @@
from collections import defaultdict
import torch
from comfy.model_detection import detect_unet_config, model_config_from_unet_config
import comfy.supported_models
def _freeze(value):
"""Recursively convert a value to a hashable form so configs can be
compared/used as dict keys or set members."""
if isinstance(value, dict):
return frozenset((k, _freeze(v)) for k, v in value.items())
if isinstance(value, (list, tuple)):
return tuple(_freeze(v) for v in value)
if isinstance(value, set):
return frozenset(_freeze(v) for v in value)
return value
def _make_longcat_comfyui_sd():
"""Minimal ComfyUI-format state dict for pre-converted LongCat-Image weights."""
sd = {}
@ -110,3 +124,21 @@ class TestModelDetection:
model_config = model_config_from_unet_config(unet_config, sd)
assert model_config is not None
assert type(model_config).__name__ == "FluxSchnell"
def test_unet_config_and_required_keys_combination_is_unique(self):
"""Each model in the registry must have a unique combination of
``unet_config`` and ``required_keys``. If two models share the same
combination, ``BASE.matches`` cannot disambiguate between them and the
first one in the list will always win."""
models = comfy.supported_models.models
groups = defaultdict(list)
for model in models:
key = (_freeze(model.unet_config), _freeze(model.required_keys))
groups[key].append(model.__name__)
duplicates = {k: names for k, names in groups.items() if len(names) > 1}
assert not duplicates, (
"Found models sharing the same (unet_config, required_keys) "
"combination, which makes detection ambiguous: "
+ "; ".join(", ".join(names) for names in duplicates.values())
)