mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-28 01:47:32 +08:00
Update categories and add new blueprints
This commit is contained in:
parent
1c8d4ca092
commit
577c56bb6b
@ -1553,7 +1553,7 @@
|
|||||||
"VHS_MetadataImage": true,
|
"VHS_MetadataImage": true,
|
||||||
"VHS_KeepIntermediate": true
|
"VHS_KeepIntermediate": true
|
||||||
},
|
},
|
||||||
"category": "Image generation and editing/Canny to image",
|
"category": "Image generation and editing/Conditioned",
|
||||||
"description": "Generates an image from a Canny edge map using Z-Image-Turbo, with text conditioning."
|
"description": "Generates an image from a Canny edge map using Z-Image-Turbo, with text conditioning."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -3600,7 +3600,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Video generation and editing/Canny to video",
|
"category": "Video generation and editing/Conditioned",
|
||||||
"description": "Generates video from Canny edge maps using LTX-2, with optional synchronized audio."
|
"description": "Generates video from Canny edge maps using LTX-2, with optional synchronized audio."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1401,7 +1401,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Image generation and editing/ControlNet",
|
"category": "Image generation and editing/Conditioned",
|
||||||
"description": "Generates images from a text prompt and ControlNet conditioning (e.g. depth, canny) using Z-Image-Turbo."
|
"description": "Generates images from a text prompt and ControlNet conditioning (e.g. depth, canny) using Z-Image-Turbo."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1579,7 +1579,7 @@
|
|||||||
"VHS_MetadataImage": true,
|
"VHS_MetadataImage": true,
|
||||||
"VHS_KeepIntermediate": true
|
"VHS_KeepIntermediate": true
|
||||||
},
|
},
|
||||||
"category": "Image generation and editing/Depth to image",
|
"category": "Image generation and editing/Conditioned",
|
||||||
"description": "Generates an image from a depth map using Z-Image-Turbo with text conditioning."
|
"description": "Generates an image from a depth map using Z-Image-Turbo with text conditioning."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -4233,7 +4233,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Video generation and editing/Depth to video",
|
"category": "Video generation and editing/Conditioned",
|
||||||
"description": "Generates depth-controlled video with LTX-2: motion and structure follow a depth-reference video alongside text prompting, optional first-frame image conditioning, with optional synchronized audio."
|
"description": "Generates depth-controlled video with LTX-2: motion and structure follow a depth-reference video alongside text prompting, optional first-frame image conditioning, with optional synchronized audio."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -5194,8 +5194,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model.",
|
"description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model."
|
||||||
"category": "Video generation and editing/Depth to video"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@ -3350,7 +3350,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {},
|
"extra": {},
|
||||||
"category": "Video generation and editing/First-Last-Frame to Video",
|
"category": "Video generation and editing/Conditioned",
|
||||||
"description": "Generates a video interpolating between first and last keyframes using LTX-2.3."
|
"description": "Generates a video interpolating between first and last keyframes using LTX-2.3."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -3350,7 +3350,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {},
|
"extra": {},
|
||||||
"category": "Video generation and editing/First-Last-Frame to Video",
|
"category": "Video generation and editing/FLF2V",
|
||||||
"description": "Generates a video that interpolates between the first and last keyframes using LTX-2.3, including optional audio."
|
"description": "Generates a video that interpolates between the first and last keyframes using LTX-2.3, including optional audio."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1256,6 +1256,8 @@
|
|||||||
"type": "BOOLEAN"
|
"type": "BOOLEAN"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"category": "3D/Geometry Estimation",
|
||||||
|
"description": "Estimates 3D scene geometry from an input image using MoGe, outputting a mesh plus OpenGL and DirectX normal maps.",
|
||||||
"extra": {}
|
"extra": {}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -310,7 +310,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Text generation/Image Captioning",
|
"category": "Image Tools",
|
||||||
"description": "Generates descriptive captions for images using Google's Gemini multimodal LLM."
|
"description": "Generates descriptive captions for images using Google's Gemini multimodal LLM."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
330
|
330
|
||||||
],
|
],
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 3,
|
"order": 0,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
@ -80,6 +80,7 @@
|
|||||||
"links": []
|
"links": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"title": "Image Depth Estimation (MoGe)",
|
||||||
"properties": {
|
"properties": {
|
||||||
"proxyWidgets": [
|
"proxyWidgets": [
|
||||||
[
|
[
|
||||||
@ -109,8 +110,7 @@
|
|||||||
"secondTabOffset": 80,
|
"secondTabOffset": 80,
|
||||||
"secondTabWidth": 65
|
"secondTabWidth": 65
|
||||||
},
|
},
|
||||||
"widgets_values": [],
|
"widgets_values": []
|
||||||
"title": "Depth Estimation (MoGe)"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"links": [],
|
"links": [],
|
||||||
@ -128,7 +128,8 @@
|
|||||||
},
|
},
|
||||||
"revision": 0,
|
"revision": 0,
|
||||||
"config": {},
|
"config": {},
|
||||||
"name": "Depth Estimation (MoGe)",
|
"name": "Image Depth Estimation (MoGe)",
|
||||||
|
"description": "Estimates monocular depth from an input image using MoGe, outputting both raw and colorized depth maps plus a mask.",
|
||||||
"inputNode": {
|
"inputNode": {
|
||||||
"id": -10,
|
"id": -10,
|
||||||
"bounding": [
|
"bounding": [
|
||||||
@ -15,7 +15,7 @@
|
|||||||
400
|
400
|
||||||
],
|
],
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 3,
|
"order": 0,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
@ -119,6 +119,7 @@
|
|||||||
"links": []
|
"links": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"title": "Image Face Detection (Mediapipe)",
|
||||||
"properties": {
|
"properties": {
|
||||||
"proxyWidgets": [
|
"proxyWidgets": [
|
||||||
[
|
[
|
||||||
@ -164,8 +165,7 @@
|
|||||||
"secondTabOffset": 80,
|
"secondTabOffset": 80,
|
||||||
"secondTabWidth": 65
|
"secondTabWidth": 65
|
||||||
},
|
},
|
||||||
"widgets_values": [],
|
"widgets_values": []
|
||||||
"title": "Face Detection (Mediapipe)"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"links": [],
|
"links": [],
|
||||||
@ -183,7 +183,8 @@
|
|||||||
},
|
},
|
||||||
"revision": 0,
|
"revision": 0,
|
||||||
"config": {},
|
"config": {},
|
||||||
"name": "Face Detection (Mediapipe)",
|
"name": "Image Face Detection (Mediapipe)",
|
||||||
|
"description": "Detects facial landmarks from an image using MediaPipe, outputting landmark data, face bounding boxes, and an optional face-region mask.",
|
||||||
"inputNode": {
|
"inputNode": {
|
||||||
"id": -10,
|
"id": -10,
|
||||||
"bounding": [
|
"bounding": [
|
||||||
@ -769,7 +770,8 @@
|
|||||||
"type": "COMBO"
|
"type": "COMBO"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {}
|
"extra": {},
|
||||||
|
"category": "Conditioning & Preprocessors/Face Detection"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -703,7 +703,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {},
|
"extra": {},
|
||||||
"category": "Image Tools/Image Segmentation",
|
"category": "Conditioning & Preprocessors/Segmentation & Mask",
|
||||||
"description": "Segments images into masks using Meta SAM3 from text prompts, points, or boxes."
|
"description": "Segments images into masks using Meta SAM3 from text prompts, points, or boxes."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -948,7 +948,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Image Tools/Preprocessors",
|
"category": "Conditioning & Preprocessors/Depth",
|
||||||
"description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model."
|
"description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1197,7 +1197,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Image Tools/Preprocessors",
|
"category": "Conditioning & Preprocessors/Pose",
|
||||||
"description": "Detects multiple people in an image and outputs per-person pose keypoints, skeleton renders, and bounding boxes using SDPose."
|
"description": "Detects multiple people in an image and outputs per-person pose keypoints, skeleton renders, and bounding boxes using SDPose."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -877,7 +877,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Image Tools/Preprocessors",
|
"category": "Conditioning & Preprocessors/Pose",
|
||||||
"description": "Extracts human pose keypoints and stick-figure visuals from an image using SDPose-OOD, with optional bounding-box input per subject."
|
"description": "Extracts human pose keypoints and stick-figure visuals from an image using SDPose-OOD, with optional bounding-box input per subject."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1298,7 +1298,7 @@
|
|||||||
"VHS_MetadataImage": true,
|
"VHS_MetadataImage": true,
|
||||||
"VHS_KeepIntermediate": true
|
"VHS_KeepIntermediate": true
|
||||||
},
|
},
|
||||||
"category": "Image generation and editing/Pose to image",
|
"category": "Image generation and editing/Conditioned",
|
||||||
"description": "Generates an image from pose keypoints using Z-Image-Turbo with text conditioning."
|
"description": "Generates an image from pose keypoints using Z-Image-Turbo with text conditioning."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -3870,7 +3870,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Video generation and editing/Pose to video",
|
"category": "Video generation and editing/Conditioned",
|
||||||
"description": "Generates video from pose reference frames using LTX-2, with optional synchronized audio."
|
"description": "Generates video from pose reference frames using LTX-2, with optional synchronized audio."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -270,7 +270,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Text generation/Prompt enhance",
|
"category": "Text Tools",
|
||||||
"description": "Expands short text prompts into detailed descriptions using a text generation model for better generation quality."
|
"description": "Expands short text prompts into detailed descriptions using a text generation model for better generation quality."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -307,7 +307,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Text generation/Video Captioning",
|
"category": "Video Tools",
|
||||||
"description": "Generates descriptive captions for video input using Google's Gemini multimodal LLM."
|
"description": "Generates descriptive captions for video input using Google's Gemini multimodal LLM."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
1225
blueprints/Video Depth Estimation (MoGe).json
Normal file
1225
blueprints/Video Depth Estimation (MoGe).json
Normal file
File diff suppressed because it is too large
Load Diff
1109
blueprints/Video Face Detection (Mediapipe).json
Normal file
1109
blueprints/Video Face Detection (Mediapipe).json
Normal file
File diff suppressed because it is too large
Load Diff
@ -3768,7 +3768,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {},
|
"extra": {},
|
||||||
"category": "Video Tools",
|
"category": "Video generation and editing/Inpaint video",
|
||||||
"description": "Removes objects from video by inpainting masked regions using VOID (CogVideoX), with SAM3 text-guided segmentation and optional two-pass optical-flow refinement."
|
"description": "Removes objects from video by inpainting masked regions using VOID (CogVideoX), with SAM3 text-guided segmentation and optional two-pass optical-flow refinement."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -818,7 +818,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"extra": {},
|
"extra": {},
|
||||||
"category": "Video Tools",
|
"category": "Conditioning & Preprocessors/Segmentation & Mask",
|
||||||
"description": "Segments video into temporally consistent masks using Meta SAM3 from text or interactive prompts."
|
"description": "Segments video into temporally consistent masks using Meta SAM3 from text or interactive prompts."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1314,7 +1314,7 @@
|
|||||||
"extra": {
|
"extra": {
|
||||||
"workflowRendererVersion": "LG"
|
"workflowRendererVersion": "LG"
|
||||||
},
|
},
|
||||||
"category": "Video Tools",
|
"category": "Conditioning & Preprocessors/Pose",
|
||||||
"description": "Extracts multi-person pose keypoints and skeleton frame sequences from video using SDPose with built-in person detection."
|
"description": "Extracts multi-person pose keypoints and skeleton frame sequences from video using SDPose with built-in person detection."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user