From 7b7c5fed7ce978b05da27b13e26ef340d284b60e Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Thu, 21 May 2026 14:39:30 +0800 Subject: [PATCH 1/8] Update MediaPipe nodes to standardize with existing code base (CORE-242) (#14025) --- comfy_extras/nodes_mediapipe.py | 35 +++++++++++-------- folder_paths.py | 2 +- .../put_detection_models_here} | 0 3 files changed, 22 insertions(+), 15 deletions(-) rename models/{mediapipe/put_mediapipe_models_here => detection/put_detection_models_here} (100%) diff --git a/comfy_extras/nodes_mediapipe.py b/comfy_extras/nodes_mediapipe.py index 2e67ae83f..6b7916aee 100644 --- a/comfy_extras/nodes_mediapipe.py +++ b/comfy_extras/nodes_mediapipe.py @@ -28,7 +28,7 @@ from comfy_extras.mediapipe.face_landmarker import FaceLandmarker from comfy_extras.mediapipe.face_geometry import transformation_matrix_from_detection -FaceLandmarkerType = io.Custom("FACE_LANDMARKER") +FaceDetectionType = io.Custom("FACE_DETECTION_MODEL") FaceLandmarksType = io.Custom("FACE_LANDMARKS") _CANONICAL_KEYS = ("canonical_vertices", "procrustes_indices", "procrustes_weights") @@ -204,18 +204,19 @@ class LoadMediaPipeFaceLandmarker(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LoadMediaPipeFaceLandmarker", - display_name="Load MediaPipe Face Landmarker", + search_aliases=["face", "facial", "mediapipe", "face landmark", "face mesh", "blazeface", "face detection"], + display_name="Load Face Detection Model (MediaPipe)", category="loaders", inputs=[ - io.Combo.Input("model_name", options=folder_paths.get_filename_list("mediapipe"), - tooltip="Face Landmarker safetensors from models/mediapipe/."), + io.Combo.Input("model_name", options=folder_paths.get_filename_list("detection"), + tooltip="Face detection model from models/detection/."), ], - outputs=[FaceLandmarkerType.Output()], + outputs=[FaceDetectionType.Output()], ) @classmethod def execute(cls, model_name) -> io.NodeOutput: - sd = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("mediapipe", model_name), safe_load=True) + sd = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("detection", model_name), safe_load=True) wrapper = FaceLandmarkerModel(sd) return io.NodeOutput(wrapper) @@ -234,10 +235,12 @@ class MediaPipeFaceLandmarker(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MediaPipeFaceLandmarker", - display_name="MediaPipe Face Landmarker", + search_aliases=["face", "facial", "mediapipe", "face landmark", "face mesh", "blazeface", "face detection"], + display_name="Detect Face Landmarks (MediaPipe)", category="image/detection", + description="Detects facial landmarks using MediaPipe model.", inputs=[ - FaceLandmarkerType.Input("face_landmarker"), + FaceDetectionType.Input("face_detection_model"), io.Image.Input("image"), io.Combo.Input("detector_variant", options=["short", "full", "both"], default="short", tooltip="Face detector range. 'short' is tuned for close-up faces " @@ -261,9 +264,9 @@ class MediaPipeFaceLandmarker(io.ComfyNode): ) @classmethod - def execute(cls, face_landmarker, image, detector_variant, num_faces, min_confidence, + def execute(cls, face_detection_model, image, detector_variant, num_faces, min_confidence, missing_frame_fallback) -> io.NodeOutput: - canonical = face_landmarker.canonical_data + canonical = face_detection_model.canonical_data img_np = _image_to_uint8(image) B, H, W = img_np.shape[:3] chunk = 16 @@ -276,7 +279,7 @@ class MediaPipeFaceLandmarker(io.ComfyNode): with tqdm(total=B, desc=f"MediaPipe Face Landmarker ({variant})") as tq: for i in range(0, B, chunk): end = min(i + chunk, B) - res.extend(face_landmarker.detect_batch( + res.extend(face_detection_model.detect_batch( [img_np[bi] for bi in range(i, end)], num_faces=int(num_faces), score_thresh=float(min_confidence), @@ -306,7 +309,7 @@ class MediaPipeFaceLandmarker(io.ComfyNode): per_bb.append({"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1, "label": "face", "score": float(f["score"])}) bboxes.append(per_bb) return io.NodeOutput({"frames": frames, "image_size": (H, W), - "connection_sets": face_landmarker.connection_sets}, bboxes) + "connection_sets": face_detection_model.connection_sets}, bboxes) # Topology keys unioned by the 'all' connections preset (contour parts + irises + nose). @@ -332,8 +335,10 @@ class MediaPipeFaceMeshVisualize(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MediaPipeFaceMeshVisualize", - display_name="MediaPipe Face Mesh Visualize", + search_aliases=["face", "facial", "mediapipe", "face landmark", "face mesh", "blazeface", "face detection", "visualize"], + display_name="Visualize Face Landmarks (MediaPipe)", category="image/detection", + description="Draws face landmarks mesh on the input image.", inputs=[ FaceLandmarksType.Input("face_landmarks"), io.Image.Input("image", optional=True, tooltip="If not connected, a black canvas will be used."), @@ -443,8 +448,10 @@ class MediaPipeFaceMask(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MediaPipeFaceMask", - display_name="MediaPipe Face Mask", + search_aliases=["face", "facial", "mediapipe", "face mask", "blazeface", "face detection", "visualize"], + display_name="Draw Face Mask (MediaPipe)", category="image/detection", + description="Draws a mask from face landmarks.", inputs=[ FaceLandmarksType.Input("face_landmarks"), io.DynamicCombo.Input( diff --git a/folder_paths.py b/folder_paths.py index ce152eb37..36d61fcd0 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -60,7 +60,7 @@ folder_names_and_paths["geometry_estimation"] = ([os.path.join(models_dir, "geom folder_names_and_paths["optical_flow"] = ([os.path.join(models_dir, "optical_flow")], supported_pt_extensions) -folder_names_and_paths["mediapipe"] = ([os.path.join(models_dir, "mediapipe")], supported_pt_extensions) +folder_names_and_paths["detection"] = ([os.path.join(models_dir, "detection")], supported_pt_extensions) output_directory = os.path.join(base_path, "output") temp_directory = os.path.join(base_path, "temp") diff --git a/models/mediapipe/put_mediapipe_models_here b/models/detection/put_detection_models_here similarity index 100% rename from models/mediapipe/put_mediapipe_models_here rename to models/detection/put_detection_models_here From af3d9b60afddbe6f7c82e31ee688f7f5c9af39d0 Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Thu, 21 May 2026 15:14:16 +0800 Subject: [PATCH 2/8] chore: Dataset nodes clean-up (CORE-237) (#14002) --- comfy_extras/nodes_audio.py | 7 +- comfy_extras/nodes_dataset.py | 188 ++++++++++++++++++++++---------- comfy_extras/nodes_hunyuan3d.py | 9 +- comfy_extras/nodes_images.py | 3 +- comfy_extras/nodes_lt_audio.py | 8 +- 5 files changed, 145 insertions(+), 70 deletions(-) diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 2d6b3c7ea..d5084497e 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -543,7 +543,7 @@ class AudioConcat(IO.ComfyNode): return IO.Schema( node_id="AudioConcat", search_aliases=["join audio", "combine audio", "append audio"], - display_name="Audio Concat", + display_name="Concatenate Audio", description="Concatenates the audio1 to audio2 in the specified direction.", category="audio", inputs=[ @@ -597,7 +597,7 @@ class AudioMerge(IO.ComfyNode): return IO.Schema( node_id="AudioMerge", search_aliases=["mix audio", "overlay audio", "layer audio"], - display_name="Audio Merge", + display_name="Merge Audio", description="Combine two audio tracks by overlaying their waveforms.", category="audio", inputs=[ @@ -667,8 +667,9 @@ class AudioAdjustVolume(IO.ComfyNode): return IO.Schema( node_id="AudioAdjustVolume", search_aliases=["audio gain", "loudness", "audio level"], - display_name="Audio Adjust Volume", + display_name="Adjust Audio Volume", category="audio", + description="Adjust the volume of the audio by a specified amount in decibels (dB).", inputs=[ IO.Audio.Input("audio"), IO.Int.Input( diff --git a/comfy_extras/nodes_dataset.py b/comfy_extras/nodes_dataset.py index 98ed25d7e..22f5ff203 100644 --- a/comfy_extras/nodes_dataset.py +++ b/comfy_extras/nodes_dataset.py @@ -47,8 +47,10 @@ class LoadImageDataSetFromFolderNode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LoadImageDataSetFromFolder", - display_name="Load Image Dataset from Folder", - category="dataset", + search_aliases=["load folder", "load from folder", "load dataset", "load images", "import dataset"], + display_name="Load Image (from Folder)", + category="image", + description="Load a dataset of images from a specified folder and return a list of images. Supported formats: PNG, JPG, JPEG, WEBP.", is_experimental=True, inputs=[ io.Combo.Input( @@ -84,14 +86,16 @@ class LoadImageTextDataSetFromFolderNode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LoadImageTextDataSetFromFolder", - display_name="Load Image and Text Dataset from Folder", - category="dataset", + search_aliases=["load folder", "load from folder", "load dataset", "load images", "import dataset"], + display_name="Load Image-Text (from Folder)", + category="image", + description="Load a dataset of pairs of images and text captions from a specified folder and return them as a list. Supported formats: PNG, JPG, JPEG, WEBP.", is_experimental=True, inputs=[ io.Combo.Input( "folder", options=folder_paths.get_input_subfolders(), - tooltip="The folder to load images from.", + tooltip="The folder to load images and text captions from.", ) ], outputs=[ @@ -206,8 +210,10 @@ class SaveImageDataSetToFolderNode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SaveImageDataSetToFolder", - display_name="Save Image Dataset to Folder", - category="dataset", + search_aliases=["save folder", "save to folder", "save dataset", "save images", "export dataset"], + display_name="Save Image (to Folder) (DEPRECATED)", + category="image", + description="Save a dataset of images to a specified folder. Supported formats: PNG.", is_experimental=True, is_output_node=True, is_input_list=True, # Receive images as list @@ -226,6 +232,7 @@ class SaveImageDataSetToFolderNode(io.ComfyNode): ), ], outputs=[], + is_deprecated=True, # This node is redundant and superseded by existing Save Image nodes where the target folder can be specified in the filename_prefix ) @classmethod @@ -246,14 +253,20 @@ class SaveImageTextDataSetToFolderNode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SaveImageTextDataSetToFolder", - display_name="Save Image and Text Dataset to Folder", - category="dataset", + search_aliases=["save folder", "save to folder", "save dataset", "save images", "save text", "export dataset"], + display_name="Save Image-Text (to Folder)", + category="image", + description="Save a dataset of pairs of images and text captions to a specified folder. Images are saved as PNG files and captions are saved as TXT files with the same filename_prefix.", is_experimental=True, is_output_node=True, is_input_list=True, # Receive both images and texts as lists inputs=[ io.Image.Input("images", tooltip="List of images to save."), - io.String.Input("texts", tooltip="List of text captions to save."), + io.String.Input("texts", + optional=True, + force_input=True, + tooltip="List of text captions to save." + ), io.String.Input( "folder_name", default="dataset", @@ -270,7 +283,7 @@ class SaveImageTextDataSetToFolderNode(io.ComfyNode): ) @classmethod - def execute(cls, images, texts, folder_name, filename_prefix): + def execute(cls, images, folder_name, filename_prefix, texts=None): # Extract scalar values folder_name = folder_name[0] filename_prefix = filename_prefix[0] @@ -279,11 +292,12 @@ class SaveImageTextDataSetToFolderNode(io.ComfyNode): saved_files = save_images_to_folder(images, output_dir, filename_prefix) # Save captions - for idx, (filename, caption) in enumerate(zip(saved_files, texts)): - caption_filename = filename.replace(".png", ".txt") - caption_path = os.path.join(output_dir, caption_filename) - with open(caption_path, "w", encoding="utf-8") as f: - f.write(caption) + if texts: + for idx, (filename, caption) in enumerate(zip(saved_files, texts)): + caption_filename = filename.replace(".png", ".txt") + caption_path = os.path.join(output_dir, caption_filename) + with open(caption_path, "w", encoding="utf-8") as f: + f.write(caption) logging.info(f"Saved {len(saved_files)} images and captions to {output_dir}.") return io.NodeOutput() @@ -314,11 +328,13 @@ class ImageProcessingNode(io.ComfyNode): Child classes should set: node_id: Unique node identifier (required) + search_aliases: List of search aliases (optional) display_name: Display name (optional, defaults to node_id) description: Node description (optional) extra_inputs: List of additional io.Input objects beyond "images" (optional) is_group_process: None (auto-detect), True (group), or False (individual) (optional) is_output_list: True (list output) or False (single output) (optional, default True) + is_deprecated: True if the node is deprecated (optional, default False) Child classes must implement ONE of: _process(cls, image, **kwargs) -> tensor (for single-item processing) @@ -326,12 +342,13 @@ class ImageProcessingNode(io.ComfyNode): """ node_id = None + search_aliases = [] display_name = None description = None extra_inputs = [] is_group_process = None # None = auto-detect, True/False = explicit is_output_list = None # None = auto-detect based on processing mode - + is_deprecated = False @classmethod def _detect_processing_mode(cls): """Detect whether this node uses group or individual processing. @@ -402,8 +419,10 @@ class ImageProcessingNode(io.ComfyNode): return io.Schema( node_id=cls.node_id, + search_aliases=cls.search_aliases, display_name=cls.display_name or cls.node_id, - category="dataset/image", + category=cls.category, + description=cls.description, is_experimental=True, is_input_list=is_group, # True for group, False for individual inputs=inputs, @@ -472,11 +491,13 @@ class TextProcessingNode(io.ComfyNode): Child classes should set: node_id: Unique node identifier (required) + search_aliases: List of search aliases (optional) display_name: Display name (optional, defaults to node_id) description: Node description (optional) extra_inputs: List of additional io.Input objects beyond "texts" (optional) is_group_process: None (auto-detect), True (group), or False (individual) (optional) is_output_list: True (list output) or False (single output) (optional, default True) + is_deprecated: True if the node is deprecated (optional, default False) Child classes must implement ONE of: _process(cls, text, **kwargs) -> str (for single-item processing) @@ -484,12 +505,13 @@ class TextProcessingNode(io.ComfyNode): """ node_id = None + search_aliases = [] display_name = None description = None extra_inputs = [] is_group_process = None # None = auto-detect, True/False = explicit is_output_list = None # None = auto-detect based on processing mode - + is_deprecated = False @classmethod def _detect_processing_mode(cls): """Detect whether this node uses group or individual processing. @@ -627,15 +649,17 @@ class TextProcessingNode(io.ComfyNode): class ResizeImagesByShorterEdgeNode(ImageProcessingNode): node_id = "ResizeImagesByShorterEdge" - display_name = "Resize Images by Shorter Edge" - description = "Resize images so that the shorter edge matches the specified length while preserving aspect ratio." + display_name = "Resize Images by Shorter Edge (DEPRECATED)" + category = "image/transform" + description = "Resize images so that the shorter edge matches the specified dimension while preserving aspect ratio." + is_deprecated = True # This node is superseded by Resize Image/Mask with resize_type = scale shorter dimension extra_inputs = [ io.Int.Input( "shorter_edge", default=512, min=1, max=8192, - tooltip="Target length for the shorter edge.", + tooltip="Target dimension for the shorter edge.", ), ] @@ -655,15 +679,17 @@ class ResizeImagesByShorterEdgeNode(ImageProcessingNode): class ResizeImagesByLongerEdgeNode(ImageProcessingNode): node_id = "ResizeImagesByLongerEdge" - display_name = "Resize Images by Longer Edge" - description = "Resize images so that the longer edge matches the specified length while preserving aspect ratio." + display_name = "Resize Images by Longer Edge (DEPRECATED)" + category = "image/transform" + description = "Resize images so that the longer edge matches the specified dimension while preserving aspect ratio." + is_deprecated = True # This node is superseded by Resize Image/Mask with resize_type = scale longer dimension extra_inputs = [ io.Int.Input( "longer_edge", default=1024, min=1, max=8192, - tooltip="Target length for the longer edge.", + tooltip="Target dimension for the longer edge.", ), ] @@ -686,8 +712,10 @@ class ResizeImagesByLongerEdgeNode(ImageProcessingNode): class CenterCropImagesNode(ImageProcessingNode): node_id = "CenterCropImages" - display_name = "Center Crop Images" - description = "Center crop all images to the specified dimensions." + search_aliases=["crop", "cut", "trim"] + display_name="Crop Image (Center)" + category="image/transform" + description = "Center crop an image to the specified dimensions." extra_inputs = [ io.Int.Input("width", default=512, min=1, max=8192, tooltip="Crop width."), io.Int.Input("height", default=512, min=1, max=8192, tooltip="Crop height."), @@ -706,10 +734,11 @@ class CenterCropImagesNode(ImageProcessingNode): class RandomCropImagesNode(ImageProcessingNode): node_id = "RandomCropImages" - display_name = "Random Crop Images" - description = ( - "Randomly crop all images to the specified dimensions (for data augmentation)." - ) + search_aliases=["crop", "cut", "trim"] + display_name = "Crop Image (Random)" + category="image/transform" + description = "Randomly crop an image to the specified dimensions." + extra_inputs = [ io.Int.Input("width", default=512, min=1, max=8192, tooltip="Crop width."), io.Int.Input("height", default=512, min=1, max=8192, tooltip="Crop height."), @@ -734,7 +763,9 @@ class RandomCropImagesNode(ImageProcessingNode): class NormalizeImagesNode(ImageProcessingNode): node_id = "NormalizeImages" - display_name = "Normalize Images" + search_aliases=["normalize", "normalize colors"] + display_name = "Normalize Image Colors" + category = "image/color" description = "Normalize images using mean and standard deviation." extra_inputs = [ io.Float.Input( @@ -762,8 +793,10 @@ class NormalizeImagesNode(ImageProcessingNode): class AdjustBrightnessNode(ImageProcessingNode): node_id = "AdjustBrightness" + search_aliases=["brightness"] display_name = "Adjust Brightness" - description = "Adjust brightness of all images." + category="image/adjustments" + description = "Adjust the brightness of an image." extra_inputs = [ io.Float.Input( "factor", @@ -781,8 +814,10 @@ class AdjustBrightnessNode(ImageProcessingNode): class AdjustContrastNode(ImageProcessingNode): node_id = "AdjustContrast" + search_aliases=["contrast"] display_name = "Adjust Contrast" - description = "Adjust contrast of all images." + category="image/adjustments" + description = "Adjust the contrast of an image." extra_inputs = [ io.Float.Input( "factor", @@ -800,8 +835,10 @@ class AdjustContrastNode(ImageProcessingNode): class ShuffleDatasetNode(ImageProcessingNode): node_id = "ShuffleDataset" - display_name = "Shuffle Image Dataset" - description = "Randomly shuffle the order of images in the dataset." + search_aliases=["shuffle", "randomize", "mix"] + display_name = "Shuffle Images List" + category = "image/batch" + description = "Randomly shuffle the order of images in a list." is_group_process = True # Requires full list to shuffle extra_inputs = [ io.Int.Input( @@ -823,13 +860,15 @@ class ShuffleImageTextDatasetNode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ShuffleImageTextDataset", - display_name="Shuffle Image-Text Dataset", - category="dataset/image", + search_aliases=["shuffle", "randomize", "mix"], + display_name = "Shuffle Pairs of Image-Text", + category = "image/batch", + description = "Randomly shuffle the order of pairs of image-text in a list.", is_experimental=True, is_input_list=True, inputs=[ io.Image.Input("images", tooltip="List of images to shuffle."), - io.String.Input("texts", tooltip="List of texts to shuffle."), + io.String.Input("texts", tooltip="List of texts to shuffle.", force_input=True), io.Int.Input( "seed", default=0, @@ -865,8 +904,11 @@ class ShuffleImageTextDatasetNode(io.ComfyNode): class TextToLowercaseNode(TextProcessingNode): node_id = "TextToLowercase" - display_name = "Text to Lowercase" - description = "Convert all texts to lowercase." + search_aliases=["lowercase"] + display_name = "Convert Text to Lowercase (DEPRECATED)" + category = "text" + description = "Convert text to lowercase." + is_deprecated = True # This node is superseded by the Convert Text Case node @classmethod def _process(cls, text): @@ -875,8 +917,11 @@ class TextToLowercaseNode(TextProcessingNode): class TextToUppercaseNode(TextProcessingNode): node_id = "TextToUppercase" - display_name = "Text to Uppercase" - description = "Convert all texts to uppercase." + search_aliases=["uppercase"] + display_name = "Convert Text to Uppercase (DEPRECATED)" + category = "text" + description = "Convert text to uppercase." + is_deprecated = True # This node is superseded by the Convert Text Case node @classmethod def _process(cls, text): @@ -885,8 +930,10 @@ class TextToUppercaseNode(TextProcessingNode): class TruncateTextNode(TextProcessingNode): node_id = "TruncateText" + search_aliases=["truncate", "cut", "shorten"] display_name = "Truncate Text" - description = "Truncate all texts to a maximum length." + category = "text" + description = "Truncate text to a maximum length." extra_inputs = [ io.Int.Input( "max_length", default=77, min=1, max=10000, tooltip="Maximum text length." @@ -900,8 +947,10 @@ class TruncateTextNode(TextProcessingNode): class AddTextPrefixNode(TextProcessingNode): node_id = "AddTextPrefix" - display_name = "Add Text Prefix" + display_name = "Add Text Prefix (DEPRECATED)" + category = "text" description = "Add a prefix to all texts." + is_deprecated = True # This node is superseded by the Concatenate Text node extra_inputs = [ io.String.Input("prefix", default="", tooltip="Prefix to add."), ] @@ -913,8 +962,10 @@ class AddTextPrefixNode(TextProcessingNode): class AddTextSuffixNode(TextProcessingNode): node_id = "AddTextSuffix" - display_name = "Add Text Suffix" + display_name = "Add Text Suffix (DEPRECATED)" + category = "text" description = "Add a suffix to all texts." + is_deprecated = True # This node is superseded by the Concatenate Text node extra_inputs = [ io.String.Input("suffix", default="", tooltip="Suffix to add."), ] @@ -926,8 +977,10 @@ class AddTextSuffixNode(TextProcessingNode): class ReplaceTextNode(TextProcessingNode): node_id = "ReplaceText" - display_name = "Replace Text" + display_name = "Replace Text (DEPRECATED)" + category = "text" description = "Replace text in all texts." + is_deprecated = True # This node is superseded by the other Replace Text node extra_inputs = [ io.String.Input("find", default="", tooltip="Text to find."), io.String.Input("replace", default="", tooltip="Text to replace with."), @@ -940,8 +993,10 @@ class ReplaceTextNode(TextProcessingNode): class StripWhitespaceNode(TextProcessingNode): node_id = "StripWhitespace" - display_name = "Strip Whitespace" + display_name = "Strip Whitespace (DEPRECATED)" + category = "text" description = "Strip leading and trailing whitespace from all texts." + is_deprecated = True # This node is superseded by the Trim Text node @classmethod def _process(cls, text): @@ -952,11 +1007,13 @@ class StripWhitespaceNode(TextProcessingNode): class ImageDeduplicationNode(ImageProcessingNode): - """Remove duplicate or very similar images from the dataset using perceptual hashing.""" + """Remove duplicate or very similar images from a list using perceptual hashing.""" node_id = "ImageDeduplication" - display_name = "Image Deduplication" - description = "Remove duplicate or very similar images from the dataset." + search_aliases=["deduplicate", "remove duplicates", "similarity filter"] + display_name = "Deduplicate Images" + category = "image/batch" + description = "Remove duplicate or very similar images from a list." is_group_process = True # Requires full list to compare images extra_inputs = [ io.Float.Input( @@ -1026,7 +1083,9 @@ class ImageGridNode(ImageProcessingNode): """Combine multiple images into a single grid/collage.""" node_id = "ImageGrid" - display_name = "Image Grid" + search_aliases=["grid", "collage", "combine"] + display_name = "Make Image Grid" + category="image/batch" description = "Arrange multiple images into a grid layout." is_group_process = True # Requires full list to create grid is_output_list = False # Outputs single grid image @@ -1102,9 +1161,12 @@ class MergeImageListsNode(ImageProcessingNode): """Merge multiple image lists into a single list.""" node_id = "MergeImageLists" - display_name = "Merge Image Lists" + search_aliases=["list", "merge list", "make list"] + display_name = "Merge Image Lists (DEPRECATED)" + category = "image/batch" description = "Concatenate multiple image lists into one." is_group_process = True # Receives images as list + is_deprecated = True # This node is superseded by the Create List node @classmethod def _group_process(cls, images): @@ -1119,9 +1181,11 @@ class MergeTextListsNode(TextProcessingNode): """Merge multiple text lists into a single list.""" node_id = "MergeTextLists" - display_name = "Merge Text Lists" + display_name = "Merge Text Lists (DEPRECATED)" + category = "text" description = "Concatenate multiple text lists into one." is_group_process = True # Receives texts as list + is_deprecated = True # This node is superseded by the Create List node @classmethod def _group_process(cls, texts): @@ -1142,8 +1206,10 @@ class ResolutionBucket(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ResolutionBucket", + search_aliases=["bucket by resolution", "group by resolution", "batch by resolution"], display_name="Resolution Bucket", - category="dataset", + category="training", + description="Group latents and conditionings into buckets", is_experimental=True, is_input_list=True, inputs=[ @@ -1236,7 +1302,8 @@ class MakeTrainingDataset(io.ComfyNode): node_id="MakeTrainingDataset", search_aliases=["encode dataset"], display_name="Make Training Dataset", - category="dataset", + category="training", + description="Encode images with VAE and texts with CLIP to create a training dataset of latents and conditionings.", is_experimental=True, is_input_list=True, # images and texts as lists inputs=[ @@ -1251,6 +1318,7 @@ class MakeTrainingDataset(io.ComfyNode): "texts", optional=True, tooltip="List of text captions. Can be length n (matching images), 1 (repeated for all), or omitted (uses empty string).", + force_input=True ), ], outputs=[ @@ -1320,9 +1388,10 @@ class SaveTrainingDataset(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SaveTrainingDataset", - search_aliases=["export training data"], + search_aliases=["export dataset", "save dataset"], display_name="Save Training Dataset", - category="dataset", + category="training", + description="Save encoded training dataset (latents + conditioning) to disk for efficient loading during training.", is_experimental=True, is_output_node=True, is_input_list=True, # Receive lists @@ -1424,7 +1493,8 @@ class LoadTrainingDataset(io.ComfyNode): node_id="LoadTrainingDataset", search_aliases=["import dataset", "training data"], display_name="Load Training Dataset", - category="dataset", + category="training", + description="Load encoded training dataset (latents + conditioning) from disk for use in training.", is_experimental=True, inputs=[ io.String.Input( diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index 403eb855b..bcd3f9198 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -419,15 +419,17 @@ class VoxelToMeshBasic(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="VoxelToMeshBasic", - display_name="Voxel to Mesh (Basic)", + display_name="Voxel to Mesh (Basic) (DEPRECATED)", category="3d", + description="Converts a voxel grid to a mesh.", + is_deprecated=True, # This node is superseded by the Voxel To Mesh node inputs=[ IO.Voxel.Input("voxel"), IO.Float.Input("threshold", default=0.6, min=-1.0, max=1.0, step=0.01), ], outputs=[ IO.Mesh.Output(), - ] + ], ) @classmethod @@ -453,9 +455,10 @@ class VoxelToMesh(IO.ComfyNode): node_id="VoxelToMesh", display_name="Voxel to Mesh", category="3d", + description="Converts a voxel grid to a mesh.", inputs=[ IO.Voxel.Input("voxel"), - IO.Combo.Input("algorithm", options=["surface net", "basic"], advanced=True), + IO.Combo.Input("algorithm", options=["surface net", "basic"]), IO.Float.Input("threshold", default=0.6, min=-1.0, max=1.0, step=0.01), ], outputs=[ diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py index 6326c5be8..4856346d7 100644 --- a/comfy_extras/nodes_images.py +++ b/comfy_extras/nodes_images.py @@ -55,9 +55,10 @@ class ImageCropV2(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="ImageCropV2", - search_aliases=["trim"], + search_aliases=["crop", "cut", "trim"], display_name="Crop Image", category="image/transform", + description = "Crop an image to the specified dimensions.", essentials_category="Image Tools", has_intermediate_output=True, inputs=[ diff --git a/comfy_extras/nodes_lt_audio.py b/comfy_extras/nodes_lt_audio.py index 2c1f63afb..51ddf584a 100644 --- a/comfy_extras/nodes_lt_audio.py +++ b/comfy_extras/nodes_lt_audio.py @@ -11,8 +11,8 @@ class LTXVAudioVAELoader(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="LTXVAudioVAELoader", - display_name="LTXV Audio VAE Loader", - category="audio", + display_name="Load LTXV Audio VAE", + category="loaders", inputs=[ io.Combo.Input( "ckpt_name", @@ -40,7 +40,7 @@ class LTXVAudioVAEEncode(VAEEncodeAudio): return io.Schema( node_id="LTXVAudioVAEEncode", display_name="LTXV Audio VAE Encode", - category="audio", + category="latent/audio", inputs=[ io.Audio.Input("audio", tooltip="The audio to be encoded."), io.Vae.Input( @@ -63,7 +63,7 @@ class LTXVAudioVAEDecode(io.ComfyNode): return io.Schema( node_id="LTXVAudioVAEDecode", display_name="LTXV Audio VAE Decode", - category="audio", + category="latent/audio", inputs=[ io.Latent.Input("samples", tooltip="The latent to be decoded."), io.Vae.Input( From 4259a0c7c3b805e3dd1f178e603e6d725780583a Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Thu, 21 May 2026 16:50:09 +0800 Subject: [PATCH 3/8] Update MoGe nodes display names, search aliases and descriptions (#14030) --- comfy_extras/nodes_moge.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/comfy_extras/nodes_moge.py b/comfy_extras/nodes_moge.py index d9a08ebc7..3508781a0 100644 --- a/comfy_extras/nodes_moge.py +++ b/comfy_extras/nodes_moge.py @@ -103,8 +103,10 @@ class MoGePanoramaInference(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MoGePanoramaInference", - display_name="MoGe Panorama Inference", + search_aliases=["moge", "panorama", "depth", "geometry", "depth estimation", "geometry estimation"], + display_name="Run MoGe Panorama Inference", category="image/geometry_estimation", + description="Run MoGe on an equirectangular panorama by splitting it into 12 perspective views, running inference on each, and merging the results into a single depth map.", inputs=[ MoGeModelType.Input("moge_model"), io.Image.Input("image", tooltip="Equirectangular panorama (any aspect)."), @@ -222,7 +224,9 @@ class MoGeInference(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MoGeInference", - display_name="MoGe Inference", + search_aliases=["moge", "depth", "geometry", "depth estimation", "geometry estimation"], + display_name="Run MoGe Inference", + description="Run MoGe on a single image to estimate depth and geometry.", category="image/geometry_estimation", inputs=[ MoGeModelType.Input("moge_model"), @@ -277,7 +281,9 @@ class MoGeRender(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MoGeRender", - display_name="MoGe Render", + search_aliases=["moge", "render", "geometry", "depth", "normal"], + display_name="Render MoGe Geometry", + description="Render a depth map or normal map from geometry data", category="image/geometry_estimation", inputs=[ MoGeGeometry.Input("moge_geometry"), @@ -342,7 +348,9 @@ class MoGePointMapToMesh(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="MoGePointMapToMesh", - display_name="MoGe Point Map to Mesh", + search_aliases=["moge", "mesh", "geometry", "point map"], + display_name="Convert MoGe Point Map to Mesh", + description="Convert a MoGe point map into a 3D mesh.", category="image/geometry_estimation", inputs=[ MoGeGeometry.Input("moge_geometry"), From aab41a9ddb3cb586024a75141fcc2f5e838da12c Mon Sep 17 00:00:00 2001 From: Edoardo Carmignani Date: Thu, 21 May 2026 17:47:20 +0200 Subject: [PATCH 4/8] fix(lanczos): correct dimension transposition for single-channel tensors (#12679) --- comfy/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comfy/utils.py b/comfy/utils.py index 00e382fac..31052714a 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -1019,10 +1019,11 @@ def bislerp(samples, width, height): def lanczos(samples, width, height): #the below API is strict and expects grayscale to be squeezed - samples = samples.squeeze(1) if samples.shape[1] == 1 else samples.movedim(1, -1) + if samples.ndim == 4: + samples = samples.squeeze(1) if samples.shape[1] == 1 else samples.movedim(1, -1) images = [Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8)) for image in samples] images = [image.resize((width, height), resample=Image.Resampling.LANCZOS) for image in images] - images = [torch.from_numpy(np.array(image).astype(np.float32) / 255.0).movedim(-1, 0) for image in images] + images = [torch.from_numpy(t).movedim(-1, 0) if (t := np.array(image).astype(np.float32) / 255.0).ndim == 3 else torch.from_numpy(t) for image in images] result = torch.stack(images) return result.to(samples.device, samples.dtype) From 03e511862ee783fec84ef14fe306ee30d4240e2c Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Fri, 22 May 2026 02:47:16 +1000 Subject: [PATCH 5/8] Fix reshaping lora application (#14031) * ModelPatcherDyanmic: purge stale vbar allocs on force cast * ModelPatcherDynamic: restore backups before load If doing a clean reload, mutative changes (lora application) could be applied on-top of the already loaded weight. Restore from backup unconditionally so that the new load is clean. --- comfy/model_patcher.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index c8ed02e70..b44b99e4a 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -1613,6 +1613,16 @@ class ModelPatcherDynamic(ModelPatcher): #use all ModelPatcherDynamic this is ignored and its all done dynamically. return super().memory_required(input_shape=input_shape) * 1.3 + (1024 ** 3) + def restore_loaded_backups(self): + restored = self.model.model_loaded_weight_memory + for key in list(self.backup.keys()): + bk = self.backup.pop(key) + comfy.utils.set_attr_param(self.model, key, bk.weight) + for key in list(self.backup_buffers.keys()): + comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key)) + self.model.model_loaded_weight_memory = 0 + return restored + def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False, dirty=False): @@ -1629,7 +1639,7 @@ class ModelPatcherDynamic(ModelPatcher): num_patches = 0 allocated_size = 0 - self.model.model_loaded_weight_memory = 0 + self.restore_loaded_backups() with self.use_ejected(): self.unpatch_hooks() @@ -1716,6 +1726,9 @@ class ModelPatcherDynamic(ModelPatcher): force_load=True if force_load: + if hasattr(m, "_v"): + comfy_aimdo.model_vbar.vbar_unpin(m._v) + delattr(m, "_v") force_load_param(self, "weight", device_to) force_load_param(self, "bias", device_to) else: @@ -1773,13 +1786,7 @@ class ModelPatcherDynamic(ModelPatcher): freed = 0 if vbar is None else vbar.free_memory(memory_to_free) if freed < memory_to_free: - for key in list(self.backup.keys()): - bk = self.backup.pop(key) - comfy.utils.set_attr_param(self.model, key, bk.weight) - for key in list(self.backup_buffers.keys()): - comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key)) - freed += self.model.model_loaded_weight_memory - self.model.model_loaded_weight_memory = 0 + freed += self.restore_loaded_backups() return freed From 6ecf5eca7ac6e5a78af96650c2da33ab8c44bb40 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 21 May 2026 21:36:11 +0300 Subject: [PATCH 6/8] [Partner Nodes] add OpenRouter LLM node (#14007) * [Partner Nodes] add reasoning widget to Anthropic node Signed-off-by: bigcat88 * [Partner Nodes] add new OpenRouterLLM node Signed-off-by: bigcat88 * [Partner Nodes] fix passing images to Grok LLM Signed-off-by: bigcat88 --------- Signed-off-by: bigcat88 --- comfy_api_nodes/apis/anthropic.py | 25 +- comfy_api_nodes/apis/openrouter.py | 93 +++++++ comfy_api_nodes/nodes_anthropic.py | 83 +++++- comfy_api_nodes/nodes_openrouter.py | 374 ++++++++++++++++++++++++++++ 4 files changed, 563 insertions(+), 12 deletions(-) create mode 100644 comfy_api_nodes/apis/openrouter.py create mode 100644 comfy_api_nodes/nodes_openrouter.py diff --git a/comfy_api_nodes/apis/anthropic.py b/comfy_api_nodes/apis/anthropic.py index 6cac537ea..46a5bb428 100644 --- a/comfy_api_nodes/apis/anthropic.py +++ b/comfy_api_nodes/apis/anthropic.py @@ -35,6 +35,19 @@ class AnthropicMessage(BaseModel): content: list[AnthropicTextContent | AnthropicImageContent] = Field(...) +class AnthropicThinkingConfig(BaseModel): + type: Literal["enabled", "disabled", "adaptive"] = Field(...) + budget_tokens: int | None = Field( + None, ge=1024, + description="Reasoning budget in tokens. Used when type is 'enabled'. Must be less than max_tokens.", + ) + + +class AnthropicOutputConfig(BaseModel): + """Used with `thinking.type='adaptive'` on models like Opus 4.7.""" + effort: Literal["low", "medium", "high"] | None = Field(None) + + class AnthropicMessagesRequest(BaseModel): model: str = Field(...) messages: list[AnthropicMessage] = Field(...) @@ -44,6 +57,8 @@ class AnthropicMessagesRequest(BaseModel): top_p: float | None = Field(None, ge=0.0, le=1.0) top_k: int | None = Field(None, ge=0) stop_sequences: list[str] | None = Field(None) + thinking: AnthropicThinkingConfig | None = Field(None) + output_config: AnthropicOutputConfig | None = Field(None) class AnthropicResponseTextBlock(BaseModel): @@ -51,6 +66,14 @@ class AnthropicResponseTextBlock(BaseModel): text: str = Field(...) +class AnthropicResponseThinkingBlock(BaseModel): + type: Literal["thinking"] = "thinking" + thinking: str = Field(...) + + +AnthropicResponseBlock = AnthropicResponseTextBlock | AnthropicResponseThinkingBlock + + class AnthropicCacheCreationUsage(BaseModel): ephemeral_5m_input_tokens: int | None = Field(None) ephemeral_1h_input_tokens: int | None = Field(None) @@ -69,7 +92,7 @@ class AnthropicMessagesResponse(BaseModel): type: str | None = Field(None) role: str | None = Field(None) model: str | None = Field(None) - content: list[AnthropicResponseTextBlock] | None = Field(None) + content: list[AnthropicResponseBlock] | None = Field(None) stop_reason: str | None = Field(None) stop_sequence: str | None = Field(None) usage: AnthropicMessagesUsage | None = Field(None) diff --git a/comfy_api_nodes/apis/openrouter.py b/comfy_api_nodes/apis/openrouter.py new file mode 100644 index 000000000..e30d9bcfb --- /dev/null +++ b/comfy_api_nodes/apis/openrouter.py @@ -0,0 +1,93 @@ +"""Pydantic models for the OpenRouter chat completions API. + +See: https://openrouter.ai/docs/api/api-reference/chat/send-chat-completion-request +""" + +from typing import Literal + +from pydantic import BaseModel, Field + + +class OpenRouterTextContent(BaseModel): + type: Literal["text"] = "text" + text: str = Field(...) + + +class OpenRouterImageUrl(BaseModel): + url: str = Field(...) + + +class OpenRouterImageContent(BaseModel): + type: Literal["image_url"] = "image_url" + image_url: OpenRouterImageUrl = Field(...) + + +class OpenRouterVideoUrl(BaseModel): + url: str = Field(...) + + +class OpenRouterVideoContent(BaseModel): + type: Literal["video_url"] = "video_url" + video_url: OpenRouterVideoUrl = Field(...) + + +OpenRouterContentBlock = OpenRouterTextContent | OpenRouterImageContent | OpenRouterVideoContent + + +class OpenRouterMessage(BaseModel): + role: Literal["system", "user", "assistant"] = Field(...) + content: str | list[OpenRouterContentBlock] = Field(...) + + +class OpenRouterReasoningConfig(BaseModel): + effort: str | None = Field(None) + exclude: bool | None = Field(None, description="If true, model reasons but reasoning is excluded from response.") + + +class OpenRouterWebSearchOptions(BaseModel): + search_context_size: str | None = Field(None) + + +class OpenRouterChatRequest(BaseModel): + model: str = Field(...) + messages: list[OpenRouterMessage] = Field(...) + seed: int | None = Field(None) + reasoning: OpenRouterReasoningConfig | None = Field(None) + web_search_options: OpenRouterWebSearchOptions | None = Field(None) + stream: bool = Field(False) + + +class OpenRouterUsage(BaseModel): + prompt_tokens: int | None = Field(None) + completion_tokens: int | None = Field(None) + total_tokens: int | None = Field(None) + cost: float | None = Field(None, description="Server-side authoritative USD cost of the call.") + + +class OpenRouterResponseMessage(BaseModel): + role: str | None = Field(None) + content: str | None = Field(None) + reasoning: str | None = Field(None) + refusal: str | None = Field(None) + + +class OpenRouterChoice(BaseModel): + index: int | None = Field(None) + message: OpenRouterResponseMessage | None = Field(None) + finish_reason: str | None = Field(None) + + +class OpenRouterError(BaseModel): + code: int | str | None = Field(None) + message: str | None = Field(None) + metadata: dict | None = Field(None) + + +class OpenRouterChatResponse(BaseModel): + id: str | None = Field(None) + model: str | None = Field(None) + object: str | None = Field(None) + provider: str | None = Field(None) + choices: list[OpenRouterChoice] | None = Field(None) + usage: OpenRouterUsage | None = Field(None) + error: OpenRouterError | None = Field(None) diff --git a/comfy_api_nodes/nodes_anthropic.py b/comfy_api_nodes/nodes_anthropic.py index 28dd70d4e..42ec5708f 100644 --- a/comfy_api_nodes/nodes_anthropic.py +++ b/comfy_api_nodes/nodes_anthropic.py @@ -9,8 +9,11 @@ from comfy_api_nodes.apis.anthropic import ( AnthropicMessage, AnthropicMessagesRequest, AnthropicMessagesResponse, + AnthropicOutputConfig, + AnthropicResponseTextBlock, AnthropicRole, AnthropicTextContent, + AnthropicThinkingConfig, ) from comfy_api_nodes.util import ( ApiEndpoint, @@ -32,15 +35,29 @@ CLAUDE_MODELS: dict[str, str] = { "Haiku 4.5": "claude-haiku-4-5-20251001", } +_THINKING_UNSUPPORTED = {"Haiku 4.5"} +# Models that use the newer "adaptive" thinking mode (Opus 4.7 requires it; older models keep the explicit budget API). +# Anthropic decides the actual budget when adaptive is used, based on the `output_config.effort` hint. +_ADAPTIVE_THINKING_MODELS = {"Opus 4.7", "Opus 4.6", "Sonnet 4.6"} -def _claude_model_inputs(): - return [ +# Budget mode (Sonnet 4.5): effort -> reasoning budget in tokens. Must be < max_tokens. +# Sized so even the "high" budget fits comfortably under the default max_tokens=32768. +_REASONING_BUDGET: dict[str, int] = { + "low": 2048, + "medium": 8192, + "high": 16384, +} +_REASONING_EFFORTS = ["off", "low", "medium", "high"] + + +def _claude_model_inputs(model_label: str): + inputs: list = [ IO.Int.Input( "max_tokens", - default=16000, - min=32, - max=32000, - tooltip="Maximum number of tokens to generate before stopping.", + default=32768, + min=4096, + max=64000, + tooltip="Maximum number of tokens to generate (includes reasoning tokens when enabled).", advanced=True, ), IO.Float.Input( @@ -49,10 +66,24 @@ def _claude_model_inputs(): min=0.0, max=1.0, step=0.01, - tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random. Ignored for Opus 4.7.", + tooltip=( + "Controls randomness. 0.0 is deterministic, 1.0 is most random. " + "Ignored for Opus 4.7 and any model when reasoning_effort is set." + ), advanced=True, ), ] + if model_label not in _THINKING_UNSUPPORTED: + inputs.append( + IO.Combo.Input( + "reasoning_effort", + options=_REASONING_EFFORTS, + default="off", + tooltip="Extended thinking effort. 'off' disables reasoning.", + advanced=True, + ) + ) + return inputs def _model_price_per_million(model: str) -> tuple[float, float] | None: @@ -95,7 +126,11 @@ def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None: def _get_text_from_response(response: AnthropicMessagesResponse) -> str: if not response.content: return "" - return "\n".join(block.text for block in response.content if block.text) + # Thinking blocks are silently dropped — we never want reasoning in the output. + return "\n".join( + block.text for block in response.content + if isinstance(block, AnthropicResponseTextBlock) and block.text + ) async def _build_image_content_blocks( @@ -133,7 +168,10 @@ class ClaudeNode(IO.ComfyNode): ), IO.DynamicCombo.Input( "model", - options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS], + options=[ + IO.DynamicCombo.Option(label, _claude_model_inputs(label)) + for label in CLAUDE_MODELS + ], tooltip="The Claude model used to generate the response.", ), IO.Int.Input( @@ -207,8 +245,29 @@ class ClaudeNode(IO.ComfyNode): ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=True, min_length=1) model_label = model["model"] - max_tokens = model["max_tokens"] - temperature = None if model_label == "Opus 4.7" else model["temperature"] + max_tokens = model.get("max_tokens", 32768) + reasoning_effort = model.get("reasoning_effort", "off") + thinking_enabled = reasoning_effort not in ("off", None) and model_label not in _THINKING_UNSUPPORTED + + # Anthropic requires temperature to be unset (defaults to 1.0) when thinking is enabled. + # Opus 4.7 also rejects user-supplied temperature. + if thinking_enabled or model_label == "Opus 4.7": + temperature = None + else: + temperature = model.get("temperature", 1.0) + + thinking_cfg: AnthropicThinkingConfig | None = None + output_cfg: AnthropicOutputConfig | None = None + if thinking_enabled: + if model_label in _ADAPTIVE_THINKING_MODELS: + # Adaptive mode - Anthropic chooses the budget based on effort hint + thinking_cfg = AnthropicThinkingConfig(type="adaptive") + output_cfg = AnthropicOutputConfig(effort=reasoning_effort) + else: + # Budget mode (Sonnet 4.5). Leave at least 1024 tokens for the actual response + budget = _REASONING_BUDGET[reasoning_effort] + budget = min(budget, max(1024, max_tokens - 1024)) + thinking_cfg = AnthropicThinkingConfig(type="enabled", budget_tokens=budget) image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None] if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES: @@ -229,6 +288,8 @@ class ClaudeNode(IO.ComfyNode): messages=[AnthropicMessage(role=AnthropicRole.user, content=content)], system=system_prompt or None, temperature=temperature, + thinking=thinking_cfg, + output_config=output_cfg, ), price_extractor=calculate_tokens_price, ) diff --git a/comfy_api_nodes/nodes_openrouter.py b/comfy_api_nodes/nodes_openrouter.py new file mode 100644 index 000000000..031301870 --- /dev/null +++ b/comfy_api_nodes/nodes_openrouter.py @@ -0,0 +1,374 @@ +"""API Nodes for OpenRouter LLM chat completions.""" + +from dataclasses import dataclass +from typing import Literal + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.openrouter import ( + OpenRouterChatRequest, + OpenRouterChatResponse, + OpenRouterContentBlock, + OpenRouterImageContent, + OpenRouterImageUrl, + OpenRouterMessage, + OpenRouterReasoningConfig, + OpenRouterTextContent, + OpenRouterVideoContent, + OpenRouterVideoUrl, + OpenRouterWebSearchOptions, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + get_number_of_images, + sync_op, + upload_images_to_comfyapi, + upload_video_to_comfyapi, + validate_string, +) + +OPENROUTER_CHAT_ENDPOINT = "/proxy/openrouter/api/v1/chat/completions" + + +Profile = Literal["standard", "reasoning", "frontier_reasoning", "perplexity", "perplexity_reasoning"] + + +@dataclass(frozen=True) +class _ModelSpec: + slug: str # exact OpenRouter model id + profile: Profile + price_in: float # USD per token (prompt) + price_out: float # USD per token (completion) + max_images: int = 0 # 0 = no image input; otherwise max URL-passed images supported + max_videos: int = 0 # 0 = no video input; otherwise max URL-passed videos supported + + +MODELS: list[_ModelSpec] = [ + _ModelSpec("anthropic/claude-opus-4.7", "frontier_reasoning", 0.000005, 0.000025, max_images=20), + _ModelSpec("openai/gpt-5.5-pro", "frontier_reasoning", 0.00003, 0.00018, max_images=20), + _ModelSpec("openai/gpt-5.5", "frontier_reasoning", 0.000005, 0.00003, max_images=20), + _ModelSpec("google/gemini-3.5-flash", "reasoning", 0.0000015, 0.000009, max_images=20, max_videos=4), + _ModelSpec("x-ai/grok-4.20", "reasoning", 0.00000125, 0.0000025, max_images=20), + _ModelSpec("x-ai/grok-4.3", "reasoning", 0.00000125, 0.0000025, max_images=20), + _ModelSpec("deepseek/deepseek-v4-pro", "reasoning", 0.000000435, 0.00000087), + _ModelSpec("deepseek/deepseek-v4-flash", "reasoning", 0.000000112, 0.000000224), + _ModelSpec("deepseek/deepseek-v3.2", "reasoning", 0.000000252, 0.000000378), + _ModelSpec("qwen/qwen3.6-max-preview", "reasoning", 0.00000104, 0.00000624), + _ModelSpec("qwen/qwen3.6-plus", "reasoning", 0.000000325, 0.00000195, max_images=10, max_videos=4), + _ModelSpec("qwen/qwen3.6-flash", "reasoning", 0.0000001875, 0.000001125, max_images=10, max_videos=4), + _ModelSpec("mistralai/mistral-large-2512", "standard", 0.0000005, 0.0000015, max_images=8), + _ModelSpec("mistralai/mistral-medium-3-5", "reasoning", 0.0000015, 0.0000075, max_images=8), + _ModelSpec("z-ai/glm-4.6", "reasoning", 0.00000043, 0.00000174), + _ModelSpec("z-ai/glm-5", "reasoning", 0.0000006, 0.00000192), + _ModelSpec("moonshotai/kimi-k2.6", "reasoning", 0.00000073, 0.00000349, max_images=10), + _ModelSpec("moonshotai/kimi-k2-thinking", "reasoning", 0.0000006, 0.0000025), + _ModelSpec("perplexity/sonar-pro", "perplexity", 0.000003, 0.000015), + _ModelSpec("perplexity/sonar-reasoning-pro", "perplexity_reasoning", 0.000002, 0.000008), + _ModelSpec("perplexity/sonar-deep-research", "perplexity_reasoning", 0.000002, 0.000008), +] + +_MODELS_BY_SLUG: dict[str, _ModelSpec] = {m.slug: m for m in MODELS} +_REASONING_EFFORTS = ["off", "low", "medium", "high"] +_SEARCH_CONTEXT_SIZES = ["low", "medium", "high"] + + +def _reasoning_extra_inputs() -> list: + return [ + IO.Combo.Input( + "reasoning_effort", + options=_REASONING_EFFORTS, + default="off", + tooltip="Reasoning effort. 'off' disables reasoning entirely.", + advanced=True, + ), + ] + + +def _perplexity_extra_inputs() -> list: + return [ + IO.Combo.Input( + "search_context_size", + options=_SEARCH_CONTEXT_SIZES, + default="medium", + tooltip="How much web search context to retrieve. Larger = more grounded but slower/pricier.", + advanced=True, + ), + ] + + +def _profile_inputs(profile: Profile) -> list: + if profile == "standard": + return [] + if profile in ("reasoning", "frontier_reasoning"): + return _reasoning_extra_inputs() + if profile == "perplexity": + return _perplexity_extra_inputs() + if profile == "perplexity_reasoning": + return _perplexity_extra_inputs() + _reasoning_extra_inputs() + raise ValueError(f"Unknown profile: {profile}") + + +def _media_inputs(spec: _ModelSpec) -> list: + extras: list = [] + if spec.max_images > 0: + extras.append( + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, spec.max_images + 1)], + min=0, + ), + tooltip=f"Optional reference image(s) — up to {spec.max_images}. Sent as URLs.", + ) + ) + if spec.max_videos > 0: + extras.append( + IO.Autogrow.Input( + "videos", + template=IO.Autogrow.TemplateNames( + IO.Video.Input("video"), + names=[f"video_{i}" for i in range(1, spec.max_videos + 1)], + min=0, + ), + tooltip=f"Optional reference video(s) — up to {spec.max_videos}. Sent as URLs.", + ) + ) + return extras + + +def _inputs_for_model(spec: _ModelSpec) -> list: + return _profile_inputs(spec.profile) + _media_inputs(spec) + + +def _build_model_options() -> list[IO.DynamicCombo.Option]: + return [IO.DynamicCombo.Option(spec.slug, _inputs_for_model(spec)) for spec in MODELS] + + +def _calculate_price(response: OpenRouterChatResponse) -> float | None: + if response.usage and response.usage.cost is not None: + return float(response.usage.cost) + return None + + +def _price_badge_jsonata() -> str: + rates_pairs = [] + for spec in MODELS: + prompt_per_1k = spec.price_in * 1000 + completion_per_1k = spec.price_out * 1000 + rates_pairs.append(f' "{spec.slug}": [{prompt_per_1k:.8g}, {completion_per_1k:.8g}]') + rates_block = ",\n".join(rates_pairs) + return ( + "(\n" + " $rates := {\n" + f"{rates_block}\n" + " };\n" + " $r := $lookup($rates, widgets.model);\n" + " $r ? {\n" + ' "type": "list_usd",\n' + ' "usd": $r,\n' + ' "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }\n' + ' } : {"type": "text", "text": "Token-based"}\n' + ")" + ) + + +async def _build_image_blocks( + cls: type[IO.ComfyNode], spec: _ModelSpec, images: list[Input.Image] +) -> list[OpenRouterImageContent]: + urls = await upload_images_to_comfyapi( + cls, + images, + max_images=spec.max_images, + total_pixels=2048 * 2048, + mime_type="image/png", + wait_label="Uploading reference images", + ) + return [OpenRouterImageContent(image_url=OpenRouterImageUrl(url=url)) for url in urls] + + +async def _build_video_blocks(cls: type[IO.ComfyNode], videos: list[Input.Video]) -> list[OpenRouterVideoContent]: + blocks: list[OpenRouterVideoContent] = [] + total = len(videos) + for idx, video in enumerate(videos): + label = "Uploading reference video" + if total > 1: + label = f"{label} ({idx + 1}/{total})" + url = await upload_video_to_comfyapi(cls, video, wait_label=label) + blocks.append(OpenRouterVideoContent(video_url=OpenRouterVideoUrl(url=url))) + return blocks + + +def _user_message(prompt: str, media_blocks: list[OpenRouterContentBlock]) -> OpenRouterMessage: + if not media_blocks: + return OpenRouterMessage(role="user", content=prompt) + blocks: list[OpenRouterContentBlock] = list(media_blocks) + blocks.append(OpenRouterTextContent(text=prompt)) + return OpenRouterMessage(role="user", content=blocks) + + +def _build_messages( + system_prompt: str, prompt: str, media_blocks: list[OpenRouterContentBlock] +) -> list[OpenRouterMessage]: + messages: list[OpenRouterMessage] = [] + if system_prompt: + messages.append(OpenRouterMessage(role="system", content=system_prompt)) + messages.append(_user_message(prompt, media_blocks)) + return messages + + +def _build_request( + slug: str, + system_prompt: str, + prompt: str, + media_blocks: list[OpenRouterContentBlock], + *, + seed: int, + reasoning_effort: str | None, + search_context_size: str | None, +) -> OpenRouterChatRequest: + reasoning_cfg: OpenRouterReasoningConfig | None = None + if reasoning_effort and reasoning_effort != "off": + # exclude=True asks providers to reason internally but not return the trace + reasoning_cfg = OpenRouterReasoningConfig(effort=reasoning_effort, exclude=True) + web_search_cfg: OpenRouterWebSearchOptions | None = None + if search_context_size: + web_search_cfg = OpenRouterWebSearchOptions(search_context_size=search_context_size) + return OpenRouterChatRequest( + model=slug, + messages=_build_messages(system_prompt, prompt, media_blocks), + seed=seed if seed > 0 else None, + reasoning=reasoning_cfg, + web_search_options=web_search_cfg, + ) + + +def _extract_text(response: OpenRouterChatResponse) -> str: + if response.error: + code = response.error.code if response.error.code is not None else "unknown" + raise ValueError(f"OpenRouter error ({code}): {response.error.message or 'no message'}") + if not response.choices: + raise ValueError("Empty response from OpenRouter (no choices).") + message = response.choices[0].message + if not message: + raise ValueError("Empty response from OpenRouter (no message).") + if message.refusal: + raise ValueError(f"Model refused to respond: {message.refusal}") + return message.content or "" + + +class OpenRouterLLMNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="OpenRouterLLMNode", + display_name="OpenRouter LLM", + category="api node/text/OpenRouter", + essentials_category="Text Generation", + description=( + "Generate text responses through OpenRouter. Routes to a curated set of popular " + "models from xAI, DeepSeek, Qwen, Mistral, Z.AI (GLM), Moonshot (Kimi), and " + "Perplexity Sonar." + ), + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text input to the model.", + ), + IO.DynamicCombo.Input( + "model", + options=_build_model_options(), + tooltip="The OpenRouter model used to generate the response.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed for sampling. Set to 0 to omit. Most models treat this as a hint only.", + ), + IO.String.Input( + "system_prompt", + multiline=True, + default="", + optional=True, + advanced=True, + tooltip="Foundational instructions that dictate the model's behavior.", + ), + ], + outputs=[IO.String.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=_price_badge_jsonata(), + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + system_prompt: str = "", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + slug: str = model["model"] + spec = _MODELS_BY_SLUG.get(slug) + if spec is None: + raise ValueError(f"Unknown OpenRouter model: {slug}") + + reasoning_effort: str | None = model.get("reasoning_effort") + search_context_size: str | None = model.get("search_context_size") + + image_tensors: list[Input.Image] = [t for t in (model.get("images") or {}).values() if t is not None] + if image_tensors and sum(get_number_of_images(t) for t in image_tensors) > spec.max_images: + raise ValueError(f"Up to {spec.max_images} images are supported for {slug}.") + video_inputs: list[Input.Video] = [v for v in (model.get("videos") or {}).values() if v is not None] + if video_inputs and len(video_inputs) > spec.max_videos: + raise ValueError(f"Up to {spec.max_videos} videos are supported for {slug}.") + + media_blocks: list[OpenRouterContentBlock] = [] + if image_tensors: + media_blocks.extend(await _build_image_blocks(cls, spec, image_tensors)) + if video_inputs: + media_blocks.extend(await _build_video_blocks(cls, video_inputs)) + + request = _build_request( + slug, + system_prompt, + prompt, + media_blocks, + seed=seed, + reasoning_effort=reasoning_effort, + search_context_size=search_context_size, + ) + + response = await sync_op( + cls, + ApiEndpoint(path=OPENROUTER_CHAT_ENDPOINT, method="POST"), + response_model=OpenRouterChatResponse, + data=request, + price_extractor=_calculate_price, + ) + return IO.NodeOutput(_extract_text(response)) + + +class OpenRouterExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [OpenRouterLLMNode] + + +async def comfy_entrypoint() -> OpenRouterExtension: + return OpenRouterExtension() From 2ca1480f9198b04aba5fb03d7584e2fb1a30065f Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Fri, 22 May 2026 02:48:20 +0800 Subject: [PATCH 7/8] chore: update workflow templates to v0.9.82 (#14034) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d2986eda8..e20b6e044 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.43.18 -comfyui-workflow-templates==0.9.79 +comfyui-workflow-templates==0.9.82 comfyui-embedded-docs==0.5.0 torch torchsde From b293f8cefd18b2f8be061e33cb985149ec2ee872 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 21 May 2026 21:58:03 +0300 Subject: [PATCH 8/8] [Partner Nodes] add widget for automatic upscaling for the ByteDance2Reference node (#14032) Signed-off-by: bigcat88 --- comfy_api_nodes/nodes_bytedance.py | 33 ++++++++++++++++++------ comfy_api_nodes/util/__init__.py | 6 +++-- comfy_api_nodes/util/conversions.py | 40 ++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index d6b479336..e08fc0b01 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -43,15 +43,16 @@ from comfy_api_nodes.util import ( ApiEndpoint, download_url_to_image_tensor, download_url_to_video_output, + downscale_video_to_max_pixels, get_number_of_images, image_tensor_pair_to_batch, poll_op, - resize_video_to_pixel_budget, sync_op, upload_audio_to_comfyapi, upload_image_to_comfyapi, upload_images_to_comfyapi, upload_video_to_comfyapi, + upscale_video_to_min_pixels, validate_image_aspect_ratio, validate_image_dimensions, validate_string, @@ -110,12 +111,13 @@ def _validate_ref_video_pixels(video: Input.Video, model_id: str, resolution: st max_px = limits.get("max") if min_px and pixels < min_px: raise ValueError( - f"Reference video {index} is too small: {w}x{h} = {pixels:,}px. " f"Minimum is {min_px:,}px for this model." + f"Reference video {index} is too small: {w}x{h} = {pixels:,} total pixels. " + f"Minimum for this model is {min_px:,} total pixels." ) if max_px and pixels > max_px: raise ValueError( - f"Reference video {index} is too large: {w}x{h} = {pixels:,}px. " - f"Maximum is {max_px:,}px for this model. Try downscaling the video." + f"Reference video {index} is too large: {w}x{h} = {pixels:,} total pixels. " + f"Maximum for this model is {max_px:,} total pixels. Try downscaling the video." ) @@ -1676,14 +1678,14 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode): "first_frame_asset_id", default="", tooltip="Seedance asset_id to use as the first frame. " - "Mutually exclusive with the first_frame image input.", + "Mutually exclusive with the first_frame image input.", optional=True, ), IO.String.Input( "last_frame_asset_id", default="", tooltip="Seedance asset_id to use as the last frame. " - "Mutually exclusive with the last_frame image input.", + "Mutually exclusive with the last_frame image input.", optional=True, ), IO.Int.Input( @@ -1865,11 +1867,20 @@ def _seedance2_reference_inputs(resolutions: list[str], default_ratio: str = "16 IO.Boolean.Input( "auto_downscale", default=False, - advanced=True, optional=True, tooltip="Automatically downscale reference videos that exceed the model's pixel budget " "for the selected resolution. Aspect ratio is preserved; videos already within limits are untouched.", ), + IO.Boolean.Input( + "auto_upscale", + default=False, + advanced=True, + optional=True, + tooltip="Automatically upscale reference videos that are below the model's minimum pixel count " + "for the selected resolution. Aspect ratio is preserved; videos already meeting the minimum are " + "untouched. Note: upscaling a low-resolution source does not add real detail and may produce " + "lower-quality generations.", + ), IO.Autogrow.Input( "reference_assets", template=IO.Autogrow.TemplateNames( @@ -2030,7 +2041,13 @@ class ByteDance2ReferenceNode(IO.ComfyNode): max_px = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id, {}).get(model["resolution"], {}).get("max") if max_px: for key in reference_videos: - reference_videos[key] = resize_video_to_pixel_budget(reference_videos[key], max_px) + reference_videos[key] = downscale_video_to_max_pixels(reference_videos[key], max_px) + + if model.get("auto_upscale") and reference_videos: + min_px = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id, {}).get(model["resolution"], {}).get("min") + if min_px: + for key in reference_videos: + reference_videos[key] = upscale_video_to_min_pixels(reference_videos[key], min_px) total_video_duration = 0.0 for i, key in enumerate(reference_videos, 1): diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py index f3584aba9..25cb88869 100644 --- a/comfy_api_nodes/util/__init__.py +++ b/comfy_api_nodes/util/__init__.py @@ -16,16 +16,17 @@ from .conversions import ( convert_mask_to_image, downscale_image_tensor, downscale_image_tensor_by_max_side, + downscale_video_to_max_pixels, image_tensor_pair_to_batch, pil_to_bytesio, resize_mask_to_image, - resize_video_to_pixel_budget, tensor_to_base64_string, tensor_to_bytesio, tensor_to_pil, text_filepath_to_base64_string, text_filepath_to_data_uri, trim_video, + upscale_video_to_min_pixels, video_to_base64_string, ) from .download_helpers import ( @@ -88,16 +89,17 @@ __all__ = [ "convert_mask_to_image", "downscale_image_tensor", "downscale_image_tensor_by_max_side", + "downscale_video_to_max_pixels", "image_tensor_pair_to_batch", "pil_to_bytesio", "resize_mask_to_image", - "resize_video_to_pixel_budget", "tensor_to_base64_string", "tensor_to_bytesio", "tensor_to_pil", "text_filepath_to_base64_string", "text_filepath_to_data_uri", "trim_video", + "upscale_video_to_min_pixels", "video_to_base64_string", # Validation utilities "get_image_dimensions", diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py index be5d5719b..5738df57f 100644 --- a/comfy_api_nodes/util/conversions.py +++ b/comfy_api_nodes/util/conversions.py @@ -415,14 +415,48 @@ def trim_video(video: Input.Video, duration_sec: float) -> Input.Video: raise RuntimeError(f"Failed to trim video: {str(e)}") from e -def resize_video_to_pixel_budget(video: Input.Video, total_pixels: int) -> Input.Video: - """Downscale a video to fit within ``total_pixels`` (w * h), preserving aspect ratio. +def downscale_video_to_max_pixels(video: Input.Video, max_pixels: int) -> Input.Video: + """Downscale a video to fit within ``max_pixels`` (w * h), preserving aspect ratio. Returns the original video object untouched when it already fits. Preserves frame rate, duration, and audio. Aspect ratio is preserved up to a fraction of a percent (even-dim rounding). """ src_w, src_h = video.get_dimensions() - scale_dims = _compute_downscale_dims(src_w, src_h, total_pixels) + scale_dims = _compute_downscale_dims(src_w, src_h, max_pixels) + if scale_dims is None: + return video + return _apply_video_scale(video, scale_dims) + + +def _compute_upscale_dims(src_w: int, src_h: int, total_pixels: int) -> tuple[int, int] | None: + """Return upscaled (w, h) with even dims meeting at least ``total_pixels``, or None if already large enough. + + Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions + are rounded up to even values (many codecs require divisible-by-2). The result is guaranteed to be at + least ``total_pixels``. + """ + pixels = src_w * src_h + if pixels >= total_pixels: + return None + scale = math.sqrt(total_pixels / pixels) + new_w = math.ceil(src_w * scale) + new_h = math.ceil(src_h * scale) + if new_w % 2: + new_w += 1 + if new_h % 2: + new_h += 1 + return new_w, new_h + + +def upscale_video_to_min_pixels(video: Input.Video, min_pixels: int) -> Input.Video: + """Upscale a video to meet at least ``min_pixels`` (w * h), preserving aspect ratio. + + Returns the original video object untouched when it already meets the minimum. Preserves frame rate, + duration, and audio. Aspect ratio is preserved up to a fraction of a percent (even-dim rounding). + Note: upscaling a low-resolution source does not add real detail; downstream model quality may suffer. + """ + src_w, src_h = video.get_dimensions() + scale_dims = _compute_upscale_dims(src_w, src_h, min_pixels) if scale_dims is None: return video return _apply_video_scale(video, scale_dims)