Merge branch 'Comfy-Org:master' into master

This commit is contained in:
azazeal04 2026-06-06 14:53:50 +02:00 committed by GitHub
commit c80ee71fc6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 599 additions and 107 deletions

View File

@ -1,5 +1,4 @@
As of the time of writing this you need this driver for best results: As of the time of writing this you need a recent driver. Updating to the latest driver is recommended.
https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-7-1-1.html
HOW TO RUN: HOW TO RUN:
@ -7,9 +6,9 @@ If you have a AMD gpu:
run_amd_gpu.bat run_amd_gpu.bat
If you have memory issues you can try disabling the smart memory management by running comfyui with: If you have memory issues you can try enabling the new dynamic memory management by running comfyui with:
run_amd_gpu_disable_smart_memory.bat run_amd_gpu_enable_dynamic_vram.bat
IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints

View File

@ -651,8 +651,7 @@ def ensure_pin_budget(size, evict_active=False):
to_free = shortfall + PIN_PRESSURE_HYSTERESIS to_free = shortfall + PIN_PRESSURE_HYSTERESIS
return free_pins(to_free, evict_active=evict_active) >= shortfall return free_pins(to_free, evict_active=evict_active) >= shortfall
def ensure_pin_registerable(size, evict_active=True): def free_registrations(shortfall, evict_active=True):
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
if MAX_PINNED_MEMORY <= 0: if MAX_PINNED_MEMORY <= 0:
return False return False
if shortfall <= 0: if shortfall <= 0:
@ -674,6 +673,9 @@ def ensure_pin_registerable(size, evict_active=True):
return True return True
return shortfall <= REGISTERABLE_PIN_HYSTERESIS return shortfall <= REGISTERABLE_PIN_HYSTERESIS
def ensure_pin_registerable(size, evict_active=True):
return free_registrations(TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY, evict_active=evict_active)
class LoadedModel: class LoadedModel:
def __init__(self, model: ModelPatcher): def __init__(self, model: ModelPatcher):
self._set_model(model) self._set_model(model)

View File

@ -89,13 +89,26 @@ def pin_memory(module, subset="weights", size=None):
not comfy.model_management.ensure_pin_registerable(registerable_size)): not comfy.model_management.ensure_pin_registerable(registerable_size)):
return _steal_pin(module, stack, buckets, size, priority) return _steal_pin(module, stack, buckets, size, priority)
extended = False
try: try:
hostbuf.extend(size=size) hostbuf.extend(size=size, register=False)
extended = True
pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
pin.untyped_storage()._comfy_hostbuf = hostbuf
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
comfy.model_management.discard_cuda_async_error()
comfy.model_management.free_registrations(size)
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
comfy.model_management.discard_cuda_async_error()
del pin
hostbuf.truncate(offset, do_unregister=False)
return _steal_pin(module, stack, buckets, size, priority)
except RuntimeError: except RuntimeError:
if extended:
hostbuf.truncate(offset, do_unregister=False)
return _steal_pin(module, stack, buckets, size, priority) return _steal_pin(module, stack, buckets, size, priority)
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] module._pin = pin
module._pin.untyped_storage()._comfy_hostbuf = hostbuf
stack.append((module, offset)) stack.append((module, offset))
module._pin_registered = True module._pin_registered = True
module._pin_stack_index = len(stack) - 1 module._pin_stack_index = len(stack) - 1

View File

@ -755,6 +755,18 @@ class File3DKSPLAT(ComfyTypeIO):
Type = File3D Type = File3D
@comfytype(io_type="FILE_3D_SPLAT_ANY")
class File3DSplatAny(ComfyTypeIO):
"""General 3D Gaussian splat file type - accepts any supported splat container (.ply / .spz / .splat / .ksplat)."""
Type = File3D
@comfytype(io_type="FILE_3D_POINT_CLOUD_ANY")
class File3DPointCloudAny(ComfyTypeIO):
"""General point cloud file type - accepts any supported point cloud container (currently .ply)."""
Type = File3D
@comfytype(io_type="HOOKS") @comfytype(io_type="HOOKS")
class Hooks(ComfyTypeIO): class Hooks(ComfyTypeIO):
if TYPE_CHECKING: if TYPE_CHECKING:
@ -2336,6 +2348,8 @@ __all__ = [
"File3DSPLAT", "File3DSPLAT",
"File3DSPZ", "File3DSPZ",
"File3DKSPLAT", "File3DKSPLAT",
"File3DSplatAny",
"File3DPointCloudAny",
"Hooks", "Hooks",
"HookKeyframes", "HookKeyframes",
"TimestepsRange", "TimestepsRange",

View File

@ -108,13 +108,19 @@ class GeminiVideoMetadata(BaseModel):
startOffset: GeminiOffset | None = Field(None) startOffset: GeminiOffset | None = Field(None)
class GeminiThinkingConfig(BaseModel):
includeThoughts: bool | None = Field(None)
thinkingLevel: str = Field(...)
class GeminiGenerationConfig(BaseModel): class GeminiGenerationConfig(BaseModel):
maxOutputTokens: int | None = Field(None, ge=16, le=8192) maxOutputTokens: int | None = Field(None, ge=16, le=65536)
seed: int | None = Field(None) seed: int | None = Field(None)
stopSequences: list[str] | None = Field(None) stopSequences: list[str] | None = Field(None)
temperature: float | None = Field(None, ge=0.0, le=2.0) temperature: float | None = Field(None, ge=0.0, le=2.0)
topK: int | None = Field(None, ge=1) topK: int | None = Field(None, ge=1)
topP: float | None = Field(None, ge=0.0, le=1.0) topP: float | None = Field(None, ge=0.0, le=1.0)
thinkingConfig: GeminiThinkingConfig | None = Field(None)
class GeminiImageOutputOptions(BaseModel): class GeminiImageOutputOptions(BaseModel):
@ -128,11 +134,6 @@ class GeminiImageConfig(BaseModel):
imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions) imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions)
class GeminiThinkingConfig(BaseModel):
includeThoughts: bool | None = Field(None)
thinkingLevel: str = Field(...)
class GeminiImageGenerationConfig(GeminiGenerationConfig): class GeminiImageGenerationConfig(GeminiGenerationConfig):
responseModalities: list[str] | None = Field(None) responseModalities: list[str] | None = Field(None)
imageConfig: GeminiImageConfig | None = Field(None) imageConfig: GeminiImageConfig | None = Field(None)

View File

@ -8,7 +8,7 @@ import os
from enum import Enum from enum import Enum
from fnmatch import fnmatch from fnmatch import fnmatch
from io import BytesIO from io import BytesIO
from typing import Literal from typing import Any, Literal
import torch import torch
from typing_extensions import override from typing_extensions import override
@ -19,6 +19,7 @@ from comfy_api_nodes.apis.gemini import (
GeminiContent, GeminiContent,
GeminiFileData, GeminiFileData,
GeminiGenerateContentRequest, GeminiGenerateContentRequest,
GeminiGenerationConfig,
GeminiGenerateContentResponse, GeminiGenerateContentResponse,
GeminiImageConfig, GeminiImageConfig,
GeminiImageGenerateContentRequest, GeminiImageGenerateContentRequest,
@ -40,13 +41,18 @@ from comfy_api_nodes.util import (
get_number_of_images, get_number_of_images,
sync_op, sync_op,
tensor_to_base64_string, tensor_to_base64_string,
upload_audio_to_comfyapi,
upload_image_to_comfyapi,
upload_images_to_comfyapi, upload_images_to_comfyapi,
upload_video_to_comfyapi,
validate_string, validate_string,
video_to_base64_string, video_to_base64_string,
) )
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini" GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024 # 20 MB GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024 # 20 MB
GEMINI_URL_INPUT_BUDGET = 10
GEMINI_MAX_INLINE_BYTES = 18 * 1024 * 1024
GEMINI_IMAGE_SYS_PROMPT = ( GEMINI_IMAGE_SYS_PROMPT = (
"You are an expert image-generation engine. You must ALWAYS produce an image.\n" "You are an expert image-generation engine. You must ALWAYS produce an image.\n"
"Interpret all user input—regardless of " "Interpret all user input—regardless of "
@ -285,6 +291,140 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N
return final_price / 1_000_000.0 return final_price / 1_000_000.0
def create_video_parts(video_input: Input.Video) -> list[GeminiPart]:
"""Convert a single video input to Gemini API compatible parts (inline MP4/H.264)."""
base_64_string = video_to_base64_string(
video_input, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264
)
return [
GeminiPart(
inlineData=GeminiInlineData(
mimeType=GeminiMimeType.video_mp4,
data=base_64_string,
)
)
]
def create_audio_parts(audio_input: Input.Audio) -> list[GeminiPart]:
"""Convert an audio input to Gemini API compatible parts (one inline MP3 part per batch item)."""
audio_parts: list[GeminiPart] = []
for batch_index in range(audio_input["waveform"].shape[0]):
# Recreate an IO.AUDIO object for the given batch dimension index
audio_at_index = Input.Audio(
waveform=audio_input["waveform"][batch_index].unsqueeze(0),
sample_rate=audio_input["sample_rate"],
)
# Convert to MP3 format for compatibility with Gemini API
audio_bytes = audio_to_base64_string(
audio_at_index,
container_format="mp3",
codec_name="libmp3lame",
)
audio_parts.append(
GeminiPart(
inlineData=GeminiInlineData(
mimeType=GeminiMimeType.audio_mp3,
data=audio_bytes,
)
)
)
return audio_parts
def _flatten_images(images: list[Input.Image]) -> list[torch.Tensor]:
"""Expand any batched image tensors into individual (H, W, C) frames, preserving order."""
frames: list[torch.Tensor] = []
for img in images:
if len(img.shape) == 4:
frames.extend(img[i] for i in range(img.shape[0]))
else:
frames.append(img)
return frames
def _flatten_audio(audios: list[Input.Audio]) -> list[Input.Audio]:
"""Expand any batched audio inputs into individual single-clip audio inputs, preserving order."""
clips: list[Input.Audio] = []
for audio in audios:
waveform = audio["waveform"]
for i in range(waveform.shape[0]):
clips.append(Input.Audio(waveform=waveform[i].unsqueeze(0), sample_rate=audio["sample_rate"]))
return clips
async def _media_url_part(cls: type[IO.ComfyNode], kind: str, payload: Any) -> GeminiPart:
"""Upload a single media unit to ComfyAPI storage and return a fileData (URL) part."""
if kind == "image":
url = await upload_image_to_comfyapi(cls, payload, mime_type="image/png", wait_label="Uploading image")
return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.image_png, fileUri=url))
if kind == "audio":
url = await upload_audio_to_comfyapi(
cls, payload, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mp3"
)
return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.audio_mp3, fileUri=url))
url = await upload_video_to_comfyapi(cls, payload, wait_label="Uploading video")
return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.video_mp4, fileUri=url))
def _media_inline_part(kind: str, payload: Any) -> tuple[GeminiPart, int]:
"""Encode a single media unit as an inline base64 part; returns (part, base64_length)."""
if kind == "image":
data = tensor_to_base64_string(payload, mime_type="image/webp")
mime = GeminiMimeType.image_webp
elif kind == "audio":
data = audio_to_base64_string(payload, container_format="mp3", codec_name="libmp3lame")
mime = GeminiMimeType.audio_mp3
else:
data = video_to_base64_string(
payload, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264
)
mime = GeminiMimeType.video_mp4
return GeminiPart(inlineData=GeminiInlineData(mimeType=mime, data=data)), len(data)
async def build_gemini_media_parts(
cls: type[IO.ComfyNode],
images: list[Input.Image],
audios: list[Input.Audio],
videos: list[Input.Video],
*,
url_budget: int = GEMINI_URL_INPUT_BUDGET,
max_inline_bytes: int = GEMINI_MAX_INLINE_BYTES,
) -> list[GeminiPart]:
"""Build Gemini parts for multimodal inputs (images, audio, video).
fileData URLs are preferred for every media type: the upload is fetched directly by the
model, keeping the request body tiny regardless of media size. The URL budget is shared
across all media and assigned largest-first (video, then audio, then images), so that if it
is ever exhausted the inline-base64 overflow is limited to the smallest items. Total inline
payload is capped by `max_inline_bytes`.
"""
units: list[tuple[str, Any]] = (
[("video", v) for v in videos]
+ [("audio", a) for a in _flatten_audio(audios)]
+ [("image", f) for f in _flatten_images(images)]
)
parts: list[GeminiPart] = []
url_used = 0
inline_bytes = 0
for kind, payload in units:
if url_used < url_budget:
parts.append(await _media_url_part(cls, kind, payload))
url_used += 1
continue
part, nbytes = _media_inline_part(kind, payload)
inline_bytes += nbytes
if inline_bytes > max_inline_bytes:
raise ValueError(
f"Too much media to send inline (over {max_inline_bytes // (1024 * 1024)}MB after the first "
f"{url_budget} inputs are uploaded as URLs). Reduce the number or size of attached media."
)
parts.append(part)
return parts
class GeminiNode(IO.ComfyNode): class GeminiNode(IO.ComfyNode):
""" """
Node to generate text responses from a Gemini model. Node to generate text responses from a Gemini model.
@ -407,58 +547,9 @@ class GeminiNode(IO.ComfyNode):
) )
""", """,
), ),
is_deprecated=True,
) )
@classmethod
def create_video_parts(cls, video_input: Input.Video) -> list[GeminiPart]:
"""Convert video input to Gemini API compatible parts."""
base_64_string = video_to_base64_string(
video_input, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264
)
return [
GeminiPart(
inlineData=GeminiInlineData(
mimeType=GeminiMimeType.video_mp4,
data=base_64_string,
)
)
]
@classmethod
def create_audio_parts(cls, audio_input: Input.Audio) -> list[GeminiPart]:
"""
Convert audio input to Gemini API compatible parts.
Args:
audio_input: Audio input from ComfyUI, containing waveform tensor and sample rate.
Returns:
List of GeminiPart objects containing the encoded audio.
"""
audio_parts: list[GeminiPart] = []
for batch_index in range(audio_input["waveform"].shape[0]):
# Recreate an IO.AUDIO object for the given batch dimension index
audio_at_index = Input.Audio(
waveform=audio_input["waveform"][batch_index].unsqueeze(0),
sample_rate=audio_input["sample_rate"],
)
# Convert to MP3 format for compatibility with Gemini API
audio_bytes = audio_to_base64_string(
audio_at_index,
container_format="mp3",
codec_name="libmp3lame",
)
audio_parts.append(
GeminiPart(
inlineData=GeminiInlineData(
mimeType=GeminiMimeType.audio_mp3,
data=audio_bytes,
)
)
)
return audio_parts
@classmethod @classmethod
async def execute( async def execute(
cls, cls,
@ -482,9 +573,9 @@ class GeminiNode(IO.ComfyNode):
if images is not None: if images is not None:
parts.extend(await create_image_parts(cls, images)) parts.extend(await create_image_parts(cls, images))
if audio is not None: if audio is not None:
parts.extend(cls.create_audio_parts(audio)) parts.extend(create_audio_parts(audio))
if video is not None: if video is not None:
parts.extend(cls.create_video_parts(video)) parts.extend(create_video_parts(video))
if files is not None: if files is not None:
parts.extend(files) parts.extend(files)
@ -512,6 +603,210 @@ class GeminiNode(IO.ComfyNode):
return IO.NodeOutput(output_text or "Empty response from Gemini model...") return IO.NodeOutput(output_text or "Empty response from Gemini model...")
GEMINI_V2_MODELS: dict[str, str] = {
"Gemini 3.1 Pro": "gemini-3.1-pro-preview",
"Gemini 3.1 Flash-Lite": "gemini-3.1-flash-lite-preview",
}
def _gemini_text_model_inputs(thinking_default: str) -> list[Input]:
"""Per-model inputs revealed by the model DynamicCombo (shared media + sampling controls)."""
return [
IO.Autogrow.Input(
"images",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("image"),
names=[f"image_{i}" for i in range(1, 17)],
min=0,
),
tooltip="Optional image(s) to use as context for the model. Up to 16 images.",
),
IO.Autogrow.Input(
"audio",
template=IO.Autogrow.TemplateNames(
IO.Audio.Input("audio"),
names=["audio_1"],
min=0,
),
tooltip="Optional audio clip to use as context for the model.",
),
IO.Autogrow.Input(
"video",
template=IO.Autogrow.TemplateNames(
IO.Video.Input("video"),
names=["video_1"],
min=0,
),
tooltip="Optional video clip to use as context for the model.",
),
IO.Custom("GEMINI_INPUT_FILES").Input(
"files",
optional=True,
tooltip="Optional file(s) to use as context for the model. "
"Accepts inputs from the Gemini Input Files node.",
),
IO.Combo.Input(
"thinking_level",
options=["LOW", "HIGH"],
default=thinking_default,
tooltip="How hard the model reasons internally before answering. "
"HIGH improves quality on difficult tasks but costs more (thinking) tokens and is slower.",
),
IO.Float.Input(
"temperature",
default=1.0,
min=0.0,
max=2.0,
step=0.01,
tooltip="Controls randomness. Lower is more focused/deterministic, higher is more creative.",
advanced=True,
),
IO.Float.Input(
"top_p",
default=0.95,
min=0.0,
max=1.0,
step=0.01,
tooltip="Nucleus sampling: sample from the smallest token set whose cumulative probability reaches top_p.",
advanced=True,
),
IO.Int.Input(
"max_output_tokens",
default=32768,
min=16,
max=65536,
tooltip="Maximum tokens to generate, including the model's internal thinking. "
"With thinking_level HIGH, a low value can leave no room for the answer; raise this if "
"responses come back empty or truncated. The model stops early when finished, so a higher "
"cap costs nothing extra for short replies.",
advanced=True,
),
]
class GeminiNodeV2(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="GeminiNodeV2",
display_name="Google Gemini",
category="partner/text/Gemini",
essentials_category="Text Generation",
description="Generate text responses with Google's Gemini models. Provide a text prompt and, "
"optionally, one or more images, audio clips, videos, or files as multimodal context.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text input to the model. Include detailed instructions, questions, or context.",
),
IO.DynamicCombo.Input(
"model",
options=[
IO.DynamicCombo.Option("Gemini 3.1 Pro", _gemini_text_model_inputs("HIGH")),
IO.DynamicCombo.Option("Gemini 3.1 Flash-Lite", _gemini_text_model_inputs("LOW")),
],
tooltip="The Gemini model used to generate the response.",
),
IO.Int.Input(
"seed",
default=42,
min=0,
max=2147483647,
control_after_generate=True,
tooltip="Seed for sampling. Set to 0 for a random seed. Deterministic output isn't guaranteed.",
),
IO.String.Input(
"system_prompt",
multiline=True,
default="",
optional=True,
advanced=True,
tooltip="Foundational instructions that dictate the model's behavior.",
),
],
outputs=[
IO.String.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
expr="""
(
$m := widgets.model;
$contains($m, "lite") ? {
"type": "list_usd",
"usd": [0.00025, 0.0015],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
} : {
"type": "list_usd",
"usd": [0.002, 0.012],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: dict,
seed: int,
system_prompt: str = "",
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
model_id = GEMINI_V2_MODELS[model["model"]]
parts: list[GeminiPart] = [GeminiPart(text=prompt)]
images = [t for t in (model.get("images") or {}).values() if t is not None]
audios = [a for a in (model.get("audio") or {}).values() if a is not None]
videos = [v for v in (model.get("video") or {}).values() if v is not None]
if images or audios or videos:
parts.extend(await build_gemini_media_parts(cls, images, audios, videos))
files = model.get("files")
if files is not None:
parts.extend(files)
gemini_system_prompt = None
if system_prompt:
gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
response = await sync_op(
cls,
endpoint=ApiEndpoint(path=f"{GEMINI_BASE_ENDPOINT}/{model_id}", method="POST"),
data=GeminiGenerateContentRequest(
contents=[
GeminiContent(
role=GeminiRole.user,
parts=parts,
)
],
generationConfig=GeminiGenerationConfig(
temperature=model["temperature"],
topP=model["top_p"],
maxOutputTokens=model["max_output_tokens"],
seed=seed if seed > 0 else None,
thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]),
),
systemInstruction=gemini_system_prompt,
),
response_model=GeminiGenerateContentResponse,
price_extractor=calculate_tokens_price,
)
output_text = get_text_from_response(response)
return IO.NodeOutput(output_text or "Empty response from Gemini model...")
class GeminiInputFiles(IO.ComfyNode): class GeminiInputFiles(IO.ComfyNode):
""" """
Loads and formats input files for use with the Gemini API. Loads and formats input files for use with the Gemini API.
@ -1129,6 +1424,26 @@ class GeminiNanoBanana2V2(IO.ComfyNode):
tooltip="Foundational instructions that dictate an AI's behavior.", tooltip="Foundational instructions that dictate an AI's behavior.",
advanced=True, advanced=True,
), ),
IO.Float.Input(
"temperature",
default=1.0,
min=0.0,
max=2.0,
step=0.01,
optional=True,
tooltip="Controls randomness in generation. Lower is more focused/deterministic.",
advanced=True,
),
IO.Float.Input(
"top_p",
default=0.95,
min=0.0,
max=1.0,
step=0.01,
optional=True,
tooltip="Nucleus sampling threshold. Lower is more focused, higher more diverse.",
advanced=True,
),
], ],
outputs=[ outputs=[
IO.Image.Output(), IO.Image.Output(),
@ -1165,6 +1480,8 @@ class GeminiNanoBanana2V2(IO.ComfyNode):
seed: int, seed: int,
response_modalities: str, response_modalities: str,
system_prompt: str = "", system_prompt: str = "",
temperature: float = 1.0,
top_p: float = 0.95,
) -> IO.NodeOutput: ) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1) validate_string(prompt, strip_whitespace=True, min_length=1)
model_choice = model["model"] model_choice = model["model"]
@ -1204,6 +1521,8 @@ class GeminiNanoBanana2V2(IO.ComfyNode):
responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]), responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
imageConfig=image_config, imageConfig=image_config,
thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]), thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]),
temperature=temperature,
topP=top_p,
), ),
systemInstruction=gemini_system_prompt, systemInstruction=gemini_system_prompt,
), ),
@ -1222,6 +1541,7 @@ class GeminiExtension(ComfyExtension):
async def get_node_list(self) -> list[type[IO.ComfyNode]]: async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [ return [
GeminiNode, GeminiNode,
GeminiNodeV2,
GeminiImage, GeminiImage,
GeminiImage2, GeminiImage2,
GeminiNanoBanana2, GeminiNanoBanana2,

View File

@ -488,7 +488,7 @@ class SplatToFile3D(IO.ComfyNode):
"spz: Niantic gzip-compressed (~10x smaller), base color only " "spz: Niantic gzip-compressed (~10x smaller), base color only "
), ),
], ],
outputs=[IO.File3DAny.Output(display_name="model_3d")], outputs=[IO.File3DSplatAny.Output(display_name="model_3d")],
) )
@classmethod @classmethod
@ -516,7 +516,7 @@ class File3DToSplat(IO.ComfyNode):
inputs=[ inputs=[
IO.MultiType.Input( IO.MultiType.Input(
IO.File3DAny.Input("model_3d"), IO.File3DAny.Input("model_3d"),
types=[IO.File3DPLY, IO.File3DSPLAT, IO.File3DKSPLAT, IO.File3DSPZ], types=[IO.File3DSplatAny, IO.File3DPLY, IO.File3DSPLAT, IO.File3DKSPLAT, IO.File3DSPZ],
tooltip="A gaussian splat 3D file", tooltip="A gaussian splat 3D file",
), ),
], ],

View File

@ -51,6 +51,14 @@ class Load3D(IO.ComfyNode):
], ],
) )
@classmethod
def validate_inputs(cls, model_file, **kwargs) -> bool | str:
if not model_file or model_file == "none":
return True
if not folder_paths.exists_annotated_filepath(model_file):
return f"Invalid 3D model file: {model_file}"
return True
@classmethod @classmethod
def execute(cls, model_file, image, **kwargs) -> IO.NodeOutput: def execute(cls, model_file, image, **kwargs) -> IO.NodeOutput:
image_path = folder_paths.get_annotated_filepath(image['image']) image_path = folder_paths.get_annotated_filepath(image['image'])
@ -136,7 +144,7 @@ class Preview3DAdvanced(IO.ComfyNode):
is_output_node=True, is_output_node=True,
inputs=[ inputs=[
IO.MultiType.Input( IO.MultiType.Input(
"model_file", "model_3d",
types=[ types=[
IO.File3DGLB, IO.File3DGLB,
IO.File3DGLTF, IO.File3DGLTF,
@ -148,34 +156,161 @@ class Preview3DAdvanced(IO.ComfyNode):
], ],
tooltip="3D model file from an upstream 3D node.", tooltip="3D model file from an upstream 3D node.",
), ),
IO.Load3D.Input("image"),
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True), IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True),
IO.Load3D.Input("viewport_state"),
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
IO.Int.Input("width", default=1024, min=1, max=4096, step=1), IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
IO.Int.Input("height", default=1024, min=1, max=4096, step=1), IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
], ],
outputs=[ outputs=[
IO.File3DAny.Output(display_name="model_file"), IO.File3DAny.Output(display_name="model_3d"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Load3DModelInfo.Output(display_name="model_3d_info"), IO.Load3DModelInfo.Output(display_name="model_3d_info"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Int.Output(display_name="width"), IO.Int.Output(display_name="width"),
IO.Int.Output(display_name="height"), IO.Int.Output(display_name="height"),
], ],
) )
@classmethod @classmethod
def execute(cls, model_file: Types.File3D, image, width: int, height: int, **kwargs) -> IO.NodeOutput: def execute(cls, model_3d: Types.File3D, viewport_state, width: int, height: int, **kwargs) -> IO.NodeOutput:
filename = f"preview3d_advanced_{uuid.uuid4().hex}.{model_file.format}" filename = f"preview3d_advanced_{uuid.uuid4().hex}.{model_3d.format}"
model_file.save_to(os.path.join(folder_paths.get_output_directory(), filename)) model_3d.save_to(os.path.join(folder_paths.get_temp_directory(), filename))
camera_info_input = kwargs.get("camera_info", None) camera_info_input = kwargs.get("camera_info", None)
camera_info = camera_info_input if camera_info_input is not None else image['camera_info'] camera_info = camera_info_input if camera_info_input is not None else viewport_state['camera_info']
model_3d_info_input = kwargs.get("model_3d_info", None) model_3d_info_input = kwargs.get("model_3d_info", None)
model_3d_info = model_3d_info_input if model_3d_info_input is not None else image.get('model_3d_info', []) model_3d_info = model_3d_info_input if model_3d_info_input is not None else viewport_state.get('model_3d_info', [])
return IO.NodeOutput( return IO.NodeOutput(
model_file, model_3d,
camera_info,
model_3d_info, model_3d_info,
camera_info,
width,
height,
ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info),
)
class PreviewGaussianSplat(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="PreviewGaussianSplat",
display_name="Preview Splat",
category="3d",
is_experimental=True,
is_output_node=True,
search_aliases=[
"view splat",
"view gaussian",
"view gaussian splat",
"preview gaussian",
"preview gaussian splat",
"view 3dgs",
"preview 3dgs",
"preview ply",
"preview spz",
"preview splat",
"preview ksplat",
],
inputs=[
IO.MultiType.Input(
"model_3d",
types=[
IO.File3DSplatAny,
IO.File3DPLY,
IO.File3DSPLAT,
IO.File3DSPZ,
IO.File3DKSPLAT,
],
tooltip="A gaussian splat 3D file.",
),
IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True),
IO.Load3D.Input("viewport_state"),
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
],
outputs=[
IO.File3DSplatAny.Output(display_name="model_3d"),
IO.Load3DModelInfo.Output(display_name="model_3d_info"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Int.Output(display_name="width"),
IO.Int.Output(display_name="height"),
],
)
@classmethod
def execute(cls, model_3d: Types.File3D, viewport_state, width: int, height: int, **kwargs) -> IO.NodeOutput:
filename = f"preview_splat_{uuid.uuid4().hex}.{model_3d.format}"
model_3d.save_to(os.path.join(folder_paths.get_temp_directory(), filename))
camera_info_input = kwargs.get("camera_info", None)
camera_info = camera_info_input if camera_info_input is not None else viewport_state['camera_info']
model_3d_info_input = kwargs.get("model_3d_info", None)
model_3d_info = model_3d_info_input if model_3d_info_input is not None else viewport_state.get('model_3d_info', [])
return IO.NodeOutput(
model_3d,
model_3d_info,
camera_info,
width,
height,
ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info),
)
class PreviewPointCloud(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="PreviewPointCloud",
display_name="Preview Point Cloud",
category="3d",
is_experimental=True,
is_output_node=True,
search_aliases=[
"view point cloud",
"view pointcloud",
"preview point cloud",
"preview pointcloud",
"preview ply",
],
inputs=[
IO.MultiType.Input(
"model_3d",
types=[
IO.File3DPointCloudAny,
IO.File3DPLY,
],
tooltip="Point cloud file (.ply)",
),
IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True),
IO.Load3D.Input("viewport_state"),
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
],
outputs=[
IO.File3DPointCloudAny.Output(display_name="model_3d"),
IO.Load3DModelInfo.Output(display_name="model_3d_info"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Int.Output(display_name="width"),
IO.Int.Output(display_name="height"),
],
)
@classmethod
def execute(cls, model_3d: Types.File3D, viewport_state, width: int, height: int, **kwargs) -> IO.NodeOutput:
filename = f"preview_pointcloud_{uuid.uuid4().hex}.{model_3d.format}"
model_3d.save_to(os.path.join(folder_paths.get_temp_directory(), filename))
camera_info_input = kwargs.get("camera_info", None)
camera_info = camera_info_input if camera_info_input is not None else viewport_state['camera_info']
model_3d_info_input = kwargs.get("model_3d_info", None)
model_3d_info = model_3d_info_input if model_3d_info_input is not None else viewport_state.get('model_3d_info', [])
return IO.NodeOutput(
model_3d,
model_3d_info,
camera_info,
width, width,
height, height,
ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info), ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info),
@ -189,6 +324,8 @@ class Load3DExtension(ComfyExtension):
Load3D, Load3D,
Preview3D, Preview3D,
Preview3DAdvanced, Preview3DAdvanced,
PreviewGaussianSplat,
PreviewPointCloud,
] ]

View File

@ -337,6 +337,12 @@ class SaveGLB(IO.ComfyNode):
IO.File3DFBX, IO.File3DFBX,
IO.File3DSTL, IO.File3DSTL,
IO.File3DUSDZ, IO.File3DUSDZ,
IO.File3DPLY,
IO.File3DSPLAT,
IO.File3DSPZ,
IO.File3DKSPLAT,
IO.File3DSplatAny,
IO.File3DPointCloudAny,
IO.File3DAny, IO.File3DAny,
], ],
tooltip="Mesh or 3D file to save", tooltip="Mesh or 3D file to save",

View File

@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
filelock filelock
av>=16.0.0 av>=16.0.0
comfy-kitchen==0.2.10 comfy-kitchen==0.2.10
comfy-aimdo==0.4.8 comfy-aimdo==0.4.9
requests requests
simpleeval>=1.0.0 simpleeval>=1.0.0
blake3 blake3