Merge branch 'master' into matt/be-1452-core-jobs-namespace-cancel
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run

This commit is contained in:
Matt Miller 2026-06-17 12:30:46 -07:00 committed by GitHub
commit 6cd9f0ddfb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
79 changed files with 23616 additions and 358 deletions

View File

@ -140,7 +140,7 @@ ComfyUI follows a weekly release cycle targeting Monday but this regularly chang
- Commits outside of the stable release tags may be very unstable and break many custom nodes. - Commits outside of the stable release tags may be very unstable and break many custom nodes.
- Serves as the foundation for the desktop release - Serves as the foundation for the desktop release
2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)** 2. **[ComfyUI Desktop](https://github.com/Comfy-Org/Comfy-Desktop)**
- Builds a new release using the latest stable core version - Builds a new release using the latest stable core version
3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)** 3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)**
@ -309,7 +309,7 @@ After this you should have everything installed and can proceed to running Comfy
#### Apple Mac silicon #### Apple Mac silicon
You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS version. You can install ComfyUI in Apple Mac silicon (M1, M2, M3 or M4) with any recent macOS version.
1. Install pytorch nightly. For instructions, read the [Accelerated PyTorch training on Mac](https://developer.apple.com/metal/pytorch/) Apple Developer guide (make sure to install the latest pytorch nightly). 1. Install pytorch nightly. For instructions, read the [Accelerated PyTorch training on Mac](https://developer.apple.com/metal/pytorch/) Apple Developer guide (make sure to install the latest pytorch nightly).
1. Follow the [ComfyUI manual installation](#manual-install-windows-linux) instructions for Windows and Linux. 1. Follow the [ComfyUI manual installation](#manual-install-windows-linux) instructions for Windows and Linux.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,569 @@
{
"revision": 0,
"last_node_id": 89,
"last_link_id": 0,
"nodes": [
{
"id": 89,
"type": "85e595bd-af9e-40ee-85c5-b98bb15da47a",
"pos": [
320,
520
],
"size": [
400,
360
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"localized_name": "image",
"name": "image",
"type": "IMAGE",
"link": null
},
{
"name": "resolution",
"type": "INT",
"widget": {
"name": "resolution"
},
"link": null
},
{
"name": "resize_method",
"type": "COMBO",
"widget": {
"name": "resize_method"
},
"link": null
},
{
"label": "output_type",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "output"
},
"link": null
},
{
"label": "output_normalization",
"name": "output.normalization",
"type": "COMBO",
"widget": {
"name": "output.normalization"
},
"link": null
},
{
"label": "apply_sky_clip",
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"widget": {
"name": "output.apply_sky_clip"
},
"link": null
},
{
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": null
}
],
"outputs": [
{
"localized_name": "IMAGE",
"name": "IMAGE",
"type": "IMAGE",
"links": []
}
],
"properties": {
"proxyWidgets": [
[
"87",
"resolution"
],
[
"87",
"resize_method"
],
[
"86",
"output"
],
[
"86",
"output.normalization"
],
[
"86",
"output.apply_sky_clip"
],
[
"88",
"model_name"
]
],
"cnr_id": "comfy-core",
"ver": "0.24.0"
},
"widgets_values": [],
"title": "Image Depth Estimation (Depth Anything 3)"
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "85e595bd-af9e-40ee-85c5-b98bb15da47a",
"version": 1,
"state": {
"lastGroupId": 4,
"lastNodeId": 89,
"lastLinkId": 109,
"lastRerouteId": 0
},
"revision": 2,
"config": {},
"name": "Image Depth Estimation (Depth Anything 3)",
"inputNode": {
"id": -10,
"bounding": [
400,
90,
166.998046875,
188
]
},
"outputNode": {
"id": -20,
"bounding": [
1250,
146,
128,
68
]
},
"inputs": [
{
"id": "43cf3118-495a-487d-8eb3-a17c7e92f64f",
"name": "image",
"type": "IMAGE",
"linkIds": [
19
],
"localized_name": "image",
"pos": [
542.998046875,
114
]
},
{
"id": "1089a0a1-6db1-45a8-84b0-0bfdc2ed920a",
"name": "resolution",
"type": "INT",
"linkIds": [
22
],
"pos": [
542.998046875,
134
]
},
{
"id": "25fb64ac-26d5-466d-995b-6d51b9afa2c4",
"name": "resize_method",
"type": "COMBO",
"linkIds": [
23
],
"pos": [
542.998046875,
154
]
},
{
"id": "8acafb7c-6c8b-46b3-9d74-c563498a3af1",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"linkIds": [
24
],
"label": "output_type",
"pos": [
542.998046875,
174
]
},
{
"id": "1da5009b-4648-43e8-a257-16426630cf22",
"name": "output.normalization",
"type": "COMBO",
"linkIds": [
25
],
"label": "output_normalization",
"pos": [
542.998046875,
194
]
},
{
"id": "fd7edb33-5fb1-4538-a411-26e5039a9321",
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"linkIds": [
26
],
"label": "apply_sky_clip",
"pos": [
542.998046875,
214
]
},
{
"id": "b5be4c8a-b833-4f1e-8c94-3ed1dd722190",
"name": "model_name",
"type": "COMBO",
"linkIds": [
106
],
"pos": [
542.998046875,
234
]
}
],
"outputs": [
{
"id": "478ab537-63bc-4d74-a9f0-c975f550880f",
"name": "IMAGE",
"type": "IMAGE",
"linkIds": [
7
],
"localized_name": "IMAGE",
"pos": [
1274,
170
]
}
],
"widgets": [],
"nodes": [
{
"id": 86,
"type": "DA3Render",
"pos": [
800,
310
],
"size": [
380,
130
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"localized_name": "da3_geometry",
"name": "da3_geometry",
"type": "DA3_GEOMETRY",
"link": 12
},
{
"localized_name": "output",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "output"
},
"link": 24
},
{
"localized_name": "output.normalization",
"name": "output.normalization",
"type": "COMBO",
"widget": {
"name": "output.normalization"
},
"link": 25
},
{
"localized_name": "output.apply_sky_clip",
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"widget": {
"name": "output.apply_sky_clip"
},
"link": 26
},
{
"name": "geometry",
"type": "DA3_GEOMETRY",
"link": null
}
],
"outputs": [
{
"localized_name": "IMAGE",
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"Node name for S&R": "DA3Render",
"cnr_id": "comfy-core",
"ver": "0.19.0"
},
"widgets_values": [
"depth",
"v2_style",
false
]
},
{
"id": 87,
"type": "DA3Inference",
"pos": [
800,
50
],
"size": [
390,
130
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{
"localized_name": "da3_model",
"name": "da3_model",
"type": "DA3_MODEL",
"link": 107
},
{
"localized_name": "image",
"name": "image",
"type": "IMAGE",
"link": 19
},
{
"localized_name": "resolution",
"name": "resolution",
"type": "INT",
"widget": {
"name": "resolution"
},
"link": 22
},
{
"localized_name": "resize_method",
"name": "resize_method",
"type": "COMBO",
"widget": {
"name": "resize_method"
},
"link": 23
},
{
"localized_name": "mode",
"name": "mode",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "mode"
},
"link": null
}
],
"outputs": [
{
"localized_name": "da3_geometry",
"name": "da3_geometry",
"type": "DA3_GEOMETRY",
"slot_index": 0,
"links": [
12
]
}
],
"properties": {
"Node name for S&R": "DA3Inference",
"cnr_id": "comfy-core",
"ver": "0.19.0"
},
"widgets_values": [
504,
"upper_bound_resize",
"mono"
]
},
{
"id": 88,
"type": "LoadDA3Model",
"pos": [
810,
-160
],
"size": [
400,
140
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{
"localized_name": "model_name",
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": 106
},
{
"localized_name": "weight_dtype",
"name": "weight_dtype",
"type": "COMBO",
"widget": {
"name": "weight_dtype"
},
"link": null
}
],
"outputs": [
{
"localized_name": "DA3_MODEL",
"name": "DA3_MODEL",
"type": "DA3_MODEL",
"links": [
107
]
}
],
"properties": {
"Node name for S&R": "LoadDA3Model",
"cnr_id": "comfy-core",
"ver": "0.24.0",
"models": [
{
"name": "depth_anything_3_mono_large.safetensors",
"url": "https://huggingface.co/Comfy-Org/Depth-Anything-3/resolve/main/geometry_estimation/depth_anything_3_mono_large.safetensors",
"directory": "geometry_estimation"
}
]
},
"widgets_values": [
"depth_anything_3_mono_large.safetensors",
"default"
]
}
],
"groups": [],
"links": [
{
"id": 12,
"origin_id": 87,
"origin_slot": 0,
"target_id": 86,
"target_slot": 0,
"type": "DA3_GEOMETRY"
},
{
"id": 19,
"origin_id": -10,
"origin_slot": 0,
"target_id": 87,
"target_slot": 1,
"type": "IMAGE"
},
{
"id": 7,
"origin_id": 86,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 22,
"origin_id": -10,
"origin_slot": 1,
"target_id": 87,
"target_slot": 2,
"type": "INT"
},
{
"id": 23,
"origin_id": -10,
"origin_slot": 2,
"target_id": 87,
"target_slot": 3,
"type": "COMBO"
},
{
"id": 24,
"origin_id": -10,
"origin_slot": 3,
"target_id": 86,
"target_slot": 1,
"type": "COMFY_DYNAMICCOMBO_V3"
},
{
"id": 25,
"origin_id": -10,
"origin_slot": 4,
"target_id": 86,
"target_slot": 2,
"type": "COMBO"
},
{
"id": 26,
"origin_id": -10,
"origin_slot": 5,
"target_id": 86,
"target_slot": 3,
"type": "BOOLEAN"
},
{
"id": 106,
"origin_id": -10,
"origin_slot": 6,
"target_id": 88,
"target_slot": 0,
"type": "COMBO"
},
{
"id": 107,
"origin_id": 88,
"origin_slot": 0,
"target_id": 87,
"target_slot": 0,
"type": "DA3_MODEL"
}
],
"extra": {},
"category": "Conditioning & Preprocessors/Depth",
"description": "This subgraph takes an input image and produces a depth map using the Depth Anything 3 model, which recovers spatially consistent geometry from any number of views. It is ideal for single or multi-view images, videos, and 3D scenes where accurate depth estimation is needed for tasks like SLAM, novel view synthesis, or spatial perception. The model uses a plain transformer backbone and supports both monocular and multi-view inputs without."
}
]
},
"extra": {
"BlueprintDescription": "This subgraph takes an input image and produces a depth map using the Depth Anything 3 model, which recovers spatially consistent geometry from any number of views. It is ideal for single or multi-view images, videos, and 3D scenes where accurate depth estimation is needed for tasks like SLAM, novel view synthesis, or spatial perception. The model uses a plain transformer backbone and supports both monocular and multi-view inputs without."
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1077,9 +1077,12 @@
} }
], ],
"extra": {}, "extra": {},
"category": "Image generation and editing/Text to image" "category": "Image generation and editing/Text to image",
"description": "This subgraph converts text prompts into non-photorealistic illustrations using a 2-billion-parameter model optimized for anime and artistic styles. It is ideal for generating concept art, character designs, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects."
} }
] ]
}, },
"extra": {} "extra": {
"BlueprintDescription": "This subgraph converts text prompts into non-photorealistic illustrations using a 2-billion-parameter model optimized for anime and artistic styles. It is ideal for generating concept art, character designs, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects."
}
} }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,825 @@
{
"revision": 0,
"last_node_id": 97,
"last_link_id": 0,
"nodes": [
{
"id": 97,
"type": "253ec5ca-8333-4ddf-a036-9fc0923651b9",
"pos": [
410,
500
],
"size": [
400,
400
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "video",
"type": "VIDEO",
"link": null
},
{
"name": "start_time",
"type": "FLOAT",
"widget": {
"name": "start_time"
},
"link": null
},
{
"name": "duration",
"type": "FLOAT",
"widget": {
"name": "duration"
},
"link": null
},
{
"name": "resolution",
"type": "INT",
"widget": {
"name": "resolution"
},
"link": null
},
{
"name": "resize_method",
"type": "COMBO",
"widget": {
"name": "resize_method"
},
"link": null
},
{
"label": "output_type",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "output"
},
"link": null
},
{
"label": "normalization",
"name": "output.normalization",
"type": "COMBO",
"widget": {
"name": "output.normalization"
},
"link": null
},
{
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"widget": {
"name": "output.apply_sky_clip"
},
"link": null
},
{
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": null
}
],
"outputs": [
{
"localized_name": "IMAGE",
"name": "IMAGE",
"type": "IMAGE",
"links": []
},
{
"name": "audio",
"type": "AUDIO",
"links": []
},
{
"name": "fps",
"type": "FLOAT",
"links": []
}
],
"properties": {
"proxyWidgets": [
[
"96",
"start_time"
],
[
"96",
"duration"
],
[
"93",
"resolution"
],
[
"93",
"resize_method"
],
[
"92",
"output"
],
[
"92",
"output.normalization"
],
[
"92",
"output.apply_sky_clip"
],
[
"94",
"model_name"
]
],
"cnr_id": "comfy-core",
"ver": "0.24.0"
},
"widgets_values": [],
"title": "Video Depth Estimation (Depth Anything 3)"
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "253ec5ca-8333-4ddf-a036-9fc0923651b9",
"version": 1,
"state": {
"lastGroupId": 4,
"lastNodeId": 97,
"lastLinkId": 129,
"lastRerouteId": 0
},
"revision": 2,
"config": {},
"name": "Video Depth Estimation (Depth Anything 3)",
"inputNode": {
"id": -10,
"bounding": [
-230,
130,
167.912109375,
228
]
},
"outputNode": {
"id": -20,
"bounding": [
1520,
140,
128,
108
]
},
"inputs": [
{
"id": "698c28c6-cf92-4039-8b39-f3062868ea7c",
"name": "video",
"type": "VIDEO",
"linkIds": [
119
],
"pos": [
-86.087890625,
154
]
},
{
"id": "97a1f63e-1585-4a40-9dec-e2700120d84a",
"name": "start_time",
"type": "FLOAT",
"linkIds": [
121
],
"pos": [
-86.087890625,
174
]
},
{
"id": "4dbbd3b3-c5ee-4a56-a0d3-3268d3b2fd64",
"name": "duration",
"type": "FLOAT",
"linkIds": [
122
],
"pos": [
-86.087890625,
194
]
},
{
"id": "16f55101-f99d-4c0c-bebf-c3b31c54f13e",
"name": "resolution",
"type": "INT",
"linkIds": [
124
],
"pos": [
-86.087890625,
214
]
},
{
"id": "d9cd7693-4bb3-4ed7-9a75-276b997abcd9",
"name": "resize_method",
"type": "COMBO",
"linkIds": [
125
],
"pos": [
-86.087890625,
234
]
},
{
"id": "a6e90532-323b-462e-ba9c-1672384d5b31",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"linkIds": [
126
],
"label": "output_type",
"pos": [
-86.087890625,
254
]
},
{
"id": "69e6aeef-437d-4fde-b2fc-d5ab9369238d",
"name": "output.normalization",
"type": "COMBO",
"linkIds": [
127
],
"label": "normalization",
"pos": [
-86.087890625,
274
]
},
{
"id": "73206f72-f89a-4698-885e-5d9277df2998",
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"linkIds": [
128
],
"pos": [
-86.087890625,
294
]
},
{
"id": "dddbc7fc-9431-448a-9ed3-9aa62404288b",
"name": "model_name",
"type": "COMBO",
"linkIds": [
129
],
"pos": [
-86.087890625,
314
]
}
],
"outputs": [
{
"id": "478ab537-63bc-4d74-a9f0-c975f550880f",
"name": "IMAGE",
"type": "IMAGE",
"linkIds": [
7
],
"localized_name": "IMAGE",
"pos": [
1544,
164
]
},
{
"id": "cdaf037e-79bc-4a94-b06c-0fd32e76f615",
"name": "audio",
"type": "AUDIO",
"linkIds": [
112
],
"pos": [
1544,
184
]
},
{
"id": "4c0e5484-d193-49c7-b107-92619628880a",
"name": "fps",
"type": "FLOAT",
"linkIds": [
113
],
"pos": [
1544,
204
]
}
],
"widgets": [],
"nodes": [
{
"id": 92,
"type": "DA3Render",
"pos": [
740,
230
],
"size": [
380,
130
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"localized_name": "da3_geometry",
"name": "da3_geometry",
"type": "DA3_GEOMETRY",
"link": 12
},
{
"localized_name": "output",
"name": "output",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "output"
},
"link": 126
},
{
"localized_name": "output.normalization",
"name": "output.normalization",
"type": "COMBO",
"widget": {
"name": "output.normalization"
},
"link": 127
},
{
"localized_name": "output.apply_sky_clip",
"name": "output.apply_sky_clip",
"type": "BOOLEAN",
"widget": {
"name": "output.apply_sky_clip"
},
"link": 128
},
{
"name": "geometry",
"type": "DA3_GEOMETRY",
"link": null
}
],
"outputs": [
{
"localized_name": "IMAGE",
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"Node name for S&R": "DA3Render",
"cnr_id": "comfy-core",
"ver": "0.19.0"
},
"widgets_values": [
"depth",
"v2_style",
false
]
},
{
"id": 93,
"type": "DA3Inference",
"pos": [
740,
-30
],
"size": [
390,
130
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{
"localized_name": "da3_model",
"name": "da3_model",
"type": "DA3_MODEL",
"link": 107
},
{
"localized_name": "image",
"name": "image",
"type": "IMAGE",
"link": 111
},
{
"localized_name": "resolution",
"name": "resolution",
"type": "INT",
"widget": {
"name": "resolution"
},
"link": 124
},
{
"localized_name": "resize_method",
"name": "resize_method",
"type": "COMBO",
"widget": {
"name": "resize_method"
},
"link": 125
},
{
"localized_name": "mode",
"name": "mode",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "mode"
},
"link": null
}
],
"outputs": [
{
"localized_name": "da3_geometry",
"name": "da3_geometry",
"type": "DA3_GEOMETRY",
"slot_index": 0,
"links": [
12
]
}
],
"properties": {
"Node name for S&R": "DA3Inference",
"cnr_id": "comfy-core",
"ver": "0.19.0"
},
"widgets_values": [
504,
"lower_bound_resize",
"mono"
]
},
{
"id": 94,
"type": "LoadDA3Model",
"pos": [
50,
410
],
"size": [
400,
140
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{
"localized_name": "model_name",
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": 129
},
{
"localized_name": "weight_dtype",
"name": "weight_dtype",
"type": "COMBO",
"widget": {
"name": "weight_dtype"
},
"link": null
}
],
"outputs": [
{
"localized_name": "DA3_MODEL",
"name": "DA3_MODEL",
"type": "DA3_MODEL",
"links": [
107
]
}
],
"properties": {
"Node name for S&R": "LoadDA3Model",
"cnr_id": "comfy-core",
"ver": "0.24.0",
"models": [
{
"name": "depth_anything_3_mono_large.safetensors",
"url": "https://huggingface.co/Comfy-Org/Depth-Anything-3/resolve/main/geometry_estimation/depth_anything_3_mono_large.safetensors",
"directory": "geometry_estimation"
}
]
},
"widgets_values": [
"depth_anything_3_mono_large.safetensors",
"default"
]
},
{
"id": 95,
"type": "GetVideoComponents",
"pos": [
70,
-140
],
"size": [
260,
120
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"localized_name": "video",
"name": "video",
"type": "VIDEO",
"link": 120
}
],
"outputs": [
{
"localized_name": "images",
"name": "images",
"type": "IMAGE",
"links": [
111
]
},
{
"localized_name": "audio",
"name": "audio",
"type": "AUDIO",
"links": [
112
]
},
{
"localized_name": "fps",
"name": "fps",
"type": "FLOAT",
"links": [
113
]
},
{
"localized_name": "bit_depth",
"name": "bit_depth",
"type": "INT",
"links": null
}
],
"properties": {
"Node name for S&R": "GetVideoComponents",
"cnr_id": "comfy-core",
"ver": "0.24.0"
}
},
{
"id": 96,
"type": "Video Slice",
"pos": [
70,
-360
],
"size": [
270,
170
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"localized_name": "video",
"name": "video",
"type": "VIDEO",
"link": 119
},
{
"localized_name": "start_time",
"name": "start_time",
"type": "FLOAT",
"widget": {
"name": "start_time"
},
"link": 121
},
{
"localized_name": "duration",
"name": "duration",
"type": "FLOAT",
"widget": {
"name": "duration"
},
"link": 122
},
{
"localized_name": "strict_duration",
"name": "strict_duration",
"type": "BOOLEAN",
"widget": {
"name": "strict_duration"
},
"link": null
}
],
"outputs": [
{
"localized_name": "VIDEO",
"name": "VIDEO",
"type": "VIDEO",
"links": [
120
]
}
],
"properties": {
"Node name for S&R": "Video Slice",
"cnr_id": "comfy-core",
"ver": "0.24.0"
},
"widgets_values": [
0,
5,
false
]
}
],
"groups": [],
"links": [
{
"id": 12,
"origin_id": 93,
"origin_slot": 0,
"target_id": 92,
"target_slot": 0,
"type": "DA3_GEOMETRY"
},
{
"id": 7,
"origin_id": 92,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 107,
"origin_id": 94,
"origin_slot": 0,
"target_id": 93,
"target_slot": 0,
"type": "DA3_MODEL"
},
{
"id": 111,
"origin_id": 95,
"origin_slot": 0,
"target_id": 93,
"target_slot": 1,
"type": "IMAGE"
},
{
"id": 112,
"origin_id": 95,
"origin_slot": 1,
"target_id": -20,
"target_slot": 1,
"type": "AUDIO"
},
{
"id": 113,
"origin_id": 95,
"origin_slot": 2,
"target_id": -20,
"target_slot": 2,
"type": "FLOAT"
},
{
"id": 119,
"origin_id": -10,
"origin_slot": 0,
"target_id": 96,
"target_slot": 0,
"type": "VIDEO"
},
{
"id": 120,
"origin_id": 96,
"origin_slot": 0,
"target_id": 95,
"target_slot": 0,
"type": "VIDEO"
},
{
"id": 121,
"origin_id": -10,
"origin_slot": 1,
"target_id": 96,
"target_slot": 1,
"type": "FLOAT"
},
{
"id": 122,
"origin_id": -10,
"origin_slot": 2,
"target_id": 96,
"target_slot": 2,
"type": "FLOAT"
},
{
"id": 124,
"origin_id": -10,
"origin_slot": 3,
"target_id": 93,
"target_slot": 2,
"type": "INT"
},
{
"id": 125,
"origin_id": -10,
"origin_slot": 4,
"target_id": 93,
"target_slot": 3,
"type": "COMBO"
},
{
"id": 126,
"origin_id": -10,
"origin_slot": 5,
"target_id": 92,
"target_slot": 1,
"type": "COMFY_DYNAMICCOMBO_V3"
},
{
"id": 127,
"origin_id": -10,
"origin_slot": 6,
"target_id": 92,
"target_slot": 2,
"type": "COMBO"
},
{
"id": 128,
"origin_id": -10,
"origin_slot": 7,
"target_id": 92,
"target_slot": 3,
"type": "BOOLEAN"
},
{
"id": 129,
"origin_id": -10,
"origin_slot": 8,
"target_id": 94,
"target_slot": 0,
"type": "COMBO"
}
],
"extra": {},
"category": "Conditioning & Preprocessors/Depth",
"description": "This subgraph processes a video input through Depth Anything 3 to produce temporally consistent depth maps for each frame, outputting a depth video. It is ideal for video content requiring spatial geometry estimation, such as 3D reconstruction, SLAM, or novel view synthesis from moving cameras. The model uses a plain transformer backbone trained with a depth-ray representation, supporting any number of views without requiring known camera poses."
}
]
},
"extra": {
"BlueprintDescription": "This subgraph processes a video input through Depth Anything 3 to produce temporally consistent depth maps for each frame, outputting a depth video. It is ideal for video content requiring spatial geometry estimation, such as 3D reconstruction, SLAM, or novel view synthesis from moving cameras. The model uses a plain transformer backbone trained with a depth-ray representation, supporting any number of views without requiring known camera poses."
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1665,7 +1665,7 @@ class SCAILWanModel(WanModel):
# embeddings # embeddings
x = self.patch_embedding(x.float()).to(x.dtype) x = self.patch_embedding(x.float()).to(x.dtype)
if ref_mask_latents is not None: # SCAIL-2 additive mask stream if ref_mask_latents is not None: # SCAIL-2 additive mask stream (one identity mask frame per reference, then video)
x = x + self.patch_embedding_mask(ref_mask_latents.float()).to(x.dtype) x = x + self.patch_embedding_mask(ref_mask_latents.float()).to(x.dtype)
grid_sizes = x.shape[2:] grid_sizes = x.shape[2:]
transformer_options["grid_sizes"] = grid_sizes transformer_options["grid_sizes"] = grid_sizes
@ -1728,22 +1728,25 @@ class SCAILWanModel(WanModel):
# ref_mask_flag is a scalar bool (CONDConstant, SCAIL-2 only). False => replacement mode, # ref_mask_flag is a scalar bool (CONDConstant, SCAIL-2 only). False => replacement mode,
# which places ref/pose via H/W rope shifts instead of the animation-mode temporal offset. # which places ref/pose via H/W rope shifts instead of the animation-mode temporal offset.
# reference_latent may stack several frames: the last is the primary reference adjacent to the video, the earlier frames are additional references.
def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, pose_latents=None, reference_latent=None, ref_mask_flag=None, transformer_options={}): def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, pose_latents=None, reference_latent=None, ref_mask_flag=None, transformer_options={}):
ref_t_patches = 0
if reference_latent is not None:
ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0]
if ref_mask_flag is not None and not bool(ref_mask_flag): if ref_mask_flag is not None and not bool(ref_mask_flag):
REF_ROPE_H = 120.0 REF_ROPE_H = 120.0
POSE_ROPE_W = 120.0 POSE_ROPE_W = 120.0
ref_t_patches = 0
if reference_latent is not None:
ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0]
main_t_patches = t - ref_t_patches main_t_patches = t - ref_t_patches
video_t_start = max(ref_t_patches - 1, 0)
parts = [] parts = []
if ref_t_patches > 0: if ref_t_patches > 0:
ref_tf = {"rope_options": {"shift_y": REF_ROPE_H, "shift_x": 0.0, "scale_y": 1.0, "scale_x": 1.0}} ref_tf = {"rope_options": {"shift_y": REF_ROPE_H, "shift_x": 0.0, "scale_y": 1.0, "scale_x": 1.0}}
parts.append(super().rope_encode(ref_t_patches, h, w, t_start=0, device=device, dtype=dtype, transformer_options=ref_tf)) parts.append(super().rope_encode(ref_t_patches, h, w, t_start=0, device=device, dtype=dtype, transformer_options=ref_tf))
if main_t_patches > 0: if main_t_patches > 0:
parts.append(super().rope_encode(main_t_patches, h, w, t_start=0, device=device, dtype=dtype, transformer_options=transformer_options)) parts.append(super().rope_encode(main_t_patches, h, w, t_start=video_t_start, device=device, dtype=dtype, transformer_options=transformer_options))
if pose_latents is not None: if pose_latents is not None:
F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1] F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1]
@ -1752,7 +1755,7 @@ class SCAILWanModel(WanModel):
h_shift = (h_scale - 1) / 2 h_shift = (h_scale - 1) / 2
w_shift = (w_scale - 1) / 2 w_shift = (w_scale - 1) / 2
pose_tf = {"rope_options": {"shift_y": h_shift, "shift_x": POSE_ROPE_W + w_shift, "scale_y": h_scale, "scale_x": w_scale}} pose_tf = {"rope_options": {"shift_y": h_shift, "shift_x": POSE_ROPE_W + w_shift, "scale_y": h_scale, "scale_x": w_scale}}
parts.append(super().rope_encode(F_pose, H_pose, W_pose, t_start=0, device=device, dtype=dtype, transformer_options=pose_tf)) parts.append(super().rope_encode(F_pose, H_pose, W_pose, t_start=video_t_start, device=device, dtype=dtype, transformer_options=pose_tf))
return torch.cat(parts, dim=1) return torch.cat(parts, dim=1)
@ -1761,10 +1764,6 @@ class SCAILWanModel(WanModel):
if pose_latents is None: if pose_latents is None:
return main_freqs return main_freqs
ref_t_patches = 0
if reference_latent is not None:
ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0]
F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1] F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1]
# if pose is at half resolution, scale_y/scale_x=2 stretches the position range to cover the same RoPE extent as the main frames # if pose is at half resolution, scale_y/scale_x=2 stretches the position range to cover the same RoPE extent as the main frames

View File

@ -1747,10 +1747,14 @@ class WAN21_SCAIL(WAN21):
reference_latents = kwargs.get("reference_latents", None) reference_latents = kwargs.get("reference_latents", None)
if reference_latents is not None: if reference_latents is not None:
ref_latent = self.process_latent_in(reference_latents[-1]) # SCAIL-2 multi-reference: reference_latents[0] is the primary ref, [1:] are additional
ref_mask = torch.ones_like(ref_latent[:, :4]) # references. Stack as [additional..., primary] so the primary stays adjacent to the video.
ref_latent = torch.cat([ref_latent, ref_mask], dim=1) ordered = list(reference_latents[1:]) + list(reference_latents[:1])
out['reference_latent'] = comfy.conds.CONDRegular(ref_latent) stacked = []
for lat in ordered:
lat = self.process_latent_in(lat)
stacked.append(torch.cat([lat, torch.ones_like(lat[:, :4])], dim=1))
out['reference_latent'] = comfy.conds.CONDRegular(torch.cat(stacked, dim=2))
pose_latents = kwargs.get("pose_video_latent", None) pose_latents = kwargs.get("pose_video_latent", None)
if pose_latents is not None: if pose_latents is not None:
@ -1792,6 +1796,7 @@ class WAN21_SCAIL2(WAN21_SCAIL):
if driving_mask_28ch is not None: if driving_mask_28ch is not None:
out['sam_latents'] = comfy.conds.CONDRegular(driving_mask_28ch.movedim(1, 2).contiguous()) out['sam_latents'] = comfy.conds.CONDRegular(driving_mask_28ch.movedim(1, 2).contiguous())
# ref_mask_28ch holds one identity mask per stacked reference frame (additional refs first, then the primary ref), followed by zeros over the video frames.
ref_mask_28ch = kwargs.get("ref_mask_28ch", None) ref_mask_28ch = kwargs.get("ref_mask_28ch", None)
if ref_mask_28ch is not None: if ref_mask_28ch is not None:
out['ref_mask_latents'] = comfy.conds.CONDRegular(ref_mask_28ch.movedim(1, 2).contiguous()) out['ref_mask_latents'] = comfy.conds.CONDRegular(ref_mask_28ch.movedim(1, 2).contiguous())
@ -1819,10 +1824,11 @@ class WAN21_SCAIL2(WAN21_SCAIL):
# Return sliced view omitting retain_index_list # Return sliced view omitting retain_index_list
return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=0) return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=0)
if cond_key == "ref_mask_latents" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor): if cond_key == "ref_mask_latents" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
# The ref mask is just a single frame padded with frames of zeros, so just grab the first frames for all windows # The ref mask is N leading ref frames padded with frames of zeros, so just grab the first frames for all windows
full_ref_mask = cond_value.cond full_ref_mask = cond_value.cond
video_frame_count = x_in.shape[2] video_frame_count = x_in.shape[2]
if full_ref_mask.shape[2] != video_frame_count + 1: ref_frame_count = full_ref_mask.shape[2] - video_frame_count
if ref_frame_count < 1:
return None return None
window_length = len(window.index_list) window_length = len(window.index_list)
@ -1831,7 +1837,7 @@ class WAN21_SCAIL2(WAN21_SCAIL):
if anchor_index is not None and anchor_index >= 0: if anchor_index is not None and anchor_index >= 0:
window_length += 1 window_length += 1
window_ref_mask = full_ref_mask[:, :, :window_length + 1].to(device) window_ref_mask = full_ref_mask[:, :, :window_length + ref_frame_count].to(device)
return cond_value._copy_with(window_ref_mask) return cond_value._copy_with(window_ref_mask)
return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)

View File

@ -67,6 +67,7 @@ import comfy.text_encoders.anima
import comfy.text_encoders.ace15 import comfy.text_encoders.ace15
import comfy.text_encoders.longcat_image import comfy.text_encoders.longcat_image
import comfy.text_encoders.qwen35 import comfy.text_encoders.qwen35
import comfy.text_encoders.qwen3vl
import comfy.text_encoders.ernie import comfy.text_encoders.ernie
import comfy.text_encoders.gemma4 import comfy.text_encoders.gemma4
import comfy.text_encoders.cogvideo import comfy.text_encoders.cogvideo
@ -1353,6 +1354,8 @@ class TEModel(Enum):
GEMMA_4_31B = 31 GEMMA_4_31B = 31
T5_GEMMA = 32 T5_GEMMA = 32
GPT_OSS_20B = 33 GPT_OSS_20B = 33
QWEN3VL_4B = 34
QWEN3VL_8B = 35
def detect_te_model(sd): def detect_te_model(sd):
@ -1414,6 +1417,8 @@ def detect_te_model(sd):
if weight.shape[0] == 5120: if weight.shape[0] == 5120:
return TEModel.QWEN35_27B return TEModel.QWEN35_27B
return TEModel.QWEN35_2B return TEModel.QWEN35_2B
if "model.visual.deepstack_merger_list.0.norm.weight" in sd: # DeepStack is unique to Qwen3-VL
return TEModel.QWEN3VL_4B if sd["model.visual.merger.linear_fc2.weight"].shape[0] == 2560 else TEModel.QWEN3VL_8B
if "model.layers.0.post_attention_layernorm.weight" in sd: if "model.layers.0.post_attention_layernorm.weight" in sd:
weight = sd['model.layers.0.post_attention_layernorm.weight'] weight = sd['model.layers.0.post_attention_layernorm.weight']
if 'model.layers.0.self_attn.q_norm.weight' in sd: if 'model.layers.0.self_attn.q_norm.weight' in sd:
@ -1612,6 +1617,20 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
qwen35_type = {TEModel.QWEN35_08B: "qwen35_08b", TEModel.QWEN35_2B: "qwen35_2b", TEModel.QWEN35_4B: "qwen35_4b", TEModel.QWEN35_9B: "qwen35_9b", TEModel.QWEN35_27B: "qwen35_27b"}[te_model] qwen35_type = {TEModel.QWEN35_08B: "qwen35_08b", TEModel.QWEN35_2B: "qwen35_2b", TEModel.QWEN35_4B: "qwen35_4b", TEModel.QWEN35_9B: "qwen35_9b", TEModel.QWEN35_27B: "qwen35_27b"}[te_model]
clip_target.clip = comfy.text_encoders.qwen35.te(**llama_detect(clip_data), model_type=qwen35_type) clip_target.clip = comfy.text_encoders.qwen35.te(**llama_detect(clip_data), model_type=qwen35_type)
clip_target.tokenizer = comfy.text_encoders.qwen35.tokenizer(model_type=qwen35_type) clip_target.tokenizer = comfy.text_encoders.qwen35.tokenizer(model_type=qwen35_type)
elif te_model in (TEModel.QWEN3VL_4B, TEModel.QWEN3VL_8B):
if clip_type == CLIPType.IDEOGRAM4 and te_model == TEModel.QWEN3VL_8B: # Ideogram4 reuses the full Qwen3-VL-8B (13-layer tap for conditioning + multimodal generate).
clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."})
clip_target.clip = comfy.text_encoders.ideogram4.te_qwen3vl(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Qwen3VLTokenizer
elif clip_type in (CLIPType.FLUX, CLIPType.FLUX2): # Flux2 Klein reuses the Qwen3-VL LM (3-layer tap -> 12288); visual unused.
klein_model_type = "qwen3_8b" if te_model == TEModel.QWEN3VL_8B else "qwen3_4b"
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type=klein_model_type)
clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B if te_model == TEModel.QWEN3VL_8B else comfy.text_encoders.flux.KleinTokenizer
else:
clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."})
qwen3vl_type = {TEModel.QWEN3VL_4B: "qwen3vl_4b", TEModel.QWEN3VL_8B: "qwen3vl_8b"}[te_model]
clip_target.clip = comfy.text_encoders.qwen3vl.te(**llama_detect(clip_data), model_type=qwen3vl_type)
clip_target.tokenizer = comfy.text_encoders.qwen3vl.tokenizer(model_type=qwen3vl_type)
elif te_model == TEModel.QWEN3_06B: elif te_model == TEModel.QWEN3_06B:
clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data)) clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer

View File

@ -9,6 +9,7 @@ import os
from transformers import Qwen2Tokenizer from transformers import Qwen2Tokenizer
import comfy.text_encoders.llama import comfy.text_encoders.llama
import comfy.text_encoders.qwen3vl
from comfy import sd1_clip from comfy import sd1_clip
# Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1. # Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1.
@ -77,3 +78,43 @@ def te(dtype_llama=None, llama_quantization_metadata=None):
model_options["quantization_metadata"] = llama_quantization_metadata model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options) super().__init__(device=device, dtype=dtype, model_options=model_options)
return Ideogram4TEModel_ return Ideogram4TEModel_
# Full Qwen3-VL-8B variant with vision
class Ideogram4Qwen3VLClipModel(comfy.text_encoders.qwen3vl.Qwen3VLClipModel):
def __init__(self, device="cpu", dtype=None, attention_mask=True, model_options={}):
super().__init__(device=device, layer=IDEOGRAM4_TAP_LAYERS, layer_idx=None, dtype=dtype,
attention_mask=attention_mask, model_options=model_options, model_type="qwen3vl_8b")
class Ideogram4Qwen3VLTEModel(sd1_clip.SD1ClipModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=Ideogram4Qwen3VLClipModel, model_options=model_options)
def encode_token_weights(self, token_weight_pairs):
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
b, n, seq, h = out.shape # (B, n_taps=13, seq, 4096), ascending layer order.
out = out.permute(0, 2, 3, 1).reshape(b, seq, h * n) # (B, seq, 4096*13 = 53248).
return out, pooled, extra
class Ideogram4Qwen3VLTokenizer(comfy.text_encoders.qwen3vl.Qwen3VLTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type="qwen3vl_8b")
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=True, **kwargs):
# Ideogram 4 conditions on the no-think template; default thinking=True drops the empty think block qwen3vl adds.
return super().tokenize_with_weights(text, return_word_ids=return_word_ids, llama_template=llama_template, images=images, prevent_empty_text=prevent_empty_text, thinking=thinking, **kwargs)
def te_qwen3vl(dtype_llama=None, llama_quantization_metadata=None):
class Ideogram4Qwen3VLTEModel_(Ideogram4Qwen3VLTEModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options)
return Ideogram4Qwen3VLTEModel_

View File

@ -251,6 +251,19 @@ class Qwen3_8BConfig:
lm_head: bool = True lm_head: bool = True
stop_tokens = [151643, 151645] stop_tokens = [151643, 151645]
@dataclass
class Qwen3VL_8BConfig(Qwen3_8BConfig):
max_position_embeddings: int = 262144
rope_theta: float = 5000000.0
rope_dims = [24, 20, 20]
interleaved_mrope = True
@dataclass
class Qwen3VL_4BConfig(Qwen3VL_8BConfig):
hidden_size: int = 2560
intermediate_size: int = 9728
lm_head: bool = False # 4B ties word embeddings
@dataclass @dataclass
class Ovis25_2BConfig: class Ovis25_2BConfig:
vocab_size: int = 151936 vocab_size: int = 151936
@ -703,7 +716,8 @@ class Llama2_(nn.Module):
interleaved_mrope=getattr(self.config, "interleaved_mrope", False), interleaved_mrope=getattr(self.config, "interleaved_mrope", False),
device=device) device=device)
def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None): def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True,
dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None,deepstack_embeds=None, visual_pos_masks=None):
if embeds is not None: if embeds is not None:
x = embeds x = embeds
else: else:
@ -767,6 +781,10 @@ class Llama2_(nn.Module):
if current_kv is not None: if current_kv is not None:
next_key_values.append(current_kv) next_key_values.append(current_kv)
# DeepStack: add per-layer visual features into the first len() decoder layers at image positions (Qwen3-VL)
if deepstack_embeds is not None and i < len(deepstack_embeds):
x[visual_pos_masks] = x[visual_pos_masks] + deepstack_embeds[i].to(x)
if i == intermediate_output: if i == intermediate_output:
intermediate = x.clone() intermediate = x.clone()
@ -860,7 +878,7 @@ class BaseGenerate:
torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0)) torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
return past_key_values return past_key_values
def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None): def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None, position_ids=None, deepstack_embeds=None, visual_pos_masks=None):
device = embeds.device device = embeds.device
if stop_tokens is None: if stop_tokens is None:
@ -884,10 +902,18 @@ class BaseGenerate:
generated_token_ids = [] generated_token_ids = []
pbar = comfy.utils.ProgressBar(max_length) pbar = comfy.utils.ProgressBar(max_length)
# MRoPE: prefill uses explicit 3D position_ids, decode continues from the last position
next_pos = int(position_ids[:, -1].max()) + 1 if position_ids is not None else None
# Generation loop # Generation loop
current_input_ids = initial_input_ids current_input_ids = initial_input_ids
for step in tqdm(range(max_length), desc="Generating tokens"): for step in tqdm(range(max_length), desc="Generating tokens"):
x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids) # DeepStack visual features are injected on the prefill only; gemma4's forward lacks these kwargs.
extra = {}
if step == 0 and deepstack_embeds is not None:
extra["deepstack_embeds"] = deepstack_embeds
extra["visual_pos_masks"] = visual_pos_masks
x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids, position_ids=position_ids, **extra)
logits = self.logits(x)[:, -1] logits = self.logits(x)[:, -1]
next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty) next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty)
token_id = next_token[0].item() token_id = next_token[0].item()
@ -895,6 +921,9 @@ class BaseGenerate:
embeds = self.model.embed_tokens(next_token).to(execution_dtype) embeds = self.model.embed_tokens(next_token).to(execution_dtype)
current_input_ids = next_token if initial_input_ids is not None else None current_input_ids = next_token if initial_input_ids is not None else None
if next_pos is not None: # advance MRoPE position for the next (decode) step
position_ids = torch.tensor([[next_pos]], device=device)
next_pos += 1
pbar.update(1) pbar.update(1)
if token_id in stop_tokens: if token_id in stop_tokens:

View File

@ -3,7 +3,6 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from dataclasses import dataclass, field from dataclasses import dataclass, field
import os import os
import math
import comfy.model_management import comfy.model_management
from comfy.ldm.modules.attention import optimized_attention_for_device from comfy.ldm.modules.attention import optimized_attention_for_device
@ -563,6 +562,8 @@ class Qwen35VisionModel(nn.Module):
for _ in range(config["depth"]) for _ in range(config["depth"])
]) ])
self.merger = Qwen35VisionPatchMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops) self.merger = Qwen35VisionPatchMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops)
self.deepstack_visual_indexes = [] # DeepStack, per-layer visual features (Qwen3-VL)
self.deepstack_merger_list = None
def rot_pos_emb(self, grid_thw): def rot_pos_emb(self, grid_thw):
merge_size = self.spatial_merge_size merge_size = self.spatial_merge_size
@ -664,9 +665,14 @@ class Qwen35VisionModel(nn.Module):
).cumsum(dim=0, dtype=torch.int32) ).cumsum(dim=0, dtype=torch.int32)
cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
optimized_attention = optimized_attention_for_device(x.device, mask=False, small_input=True) optimized_attention = optimized_attention_for_device(x.device, mask=False, small_input=True)
for blk in self.blocks: deepstack_features = []
for layer_num, blk in enumerate(self.blocks):
x = blk(x, cu_seqlens=cu_seqlens, position_embeddings=position_embeddings, optimized_attention=optimized_attention) x = blk(x, cu_seqlens=cu_seqlens, position_embeddings=position_embeddings, optimized_attention=optimized_attention)
if self.deepstack_merger_list is not None and layer_num in self.deepstack_visual_indexes:
deepstack_features.append(self.deepstack_merger_list[self.deepstack_visual_indexes.index(layer_num)](x))
merged = self.merger(x) merged = self.merger(x)
if self.deepstack_merger_list is not None:
return merged, deepstack_features
return merged return merged
# Model Wrapper # Model Wrapper
@ -690,30 +696,7 @@ class Qwen35(BaseLlama, BaseGenerate, torch.nn.Module):
return None, None return None, None
def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, embeds_info=[], past_key_values=None): def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, embeds_info=[], past_key_values=None):
grid = None position_ids = comfy.text_encoders.qwen_vl.qwen2vl_mrope_position_ids(embeds_info, embeds.shape[1], embeds.device)
position_ids = None
offset = 0
for e in embeds_info:
if e.get("type") == "image":
grid = e.get("extra", None)
start = e.get("index")
if position_ids is None:
position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
end = e.get("size") + start
len_max = int(grid.max()) // 2
start_next = len_max + start
position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
position_ids[0, start:end] = start + offset
max_d = int(grid[0][1]) // 2
position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
max_d = int(grid[0][2]) // 2
position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start]
offset += len_max - (end - start)
if grid is None:
position_ids = None
return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids, past_key_values=past_key_values) return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids, past_key_values=past_key_values)
def init_kv_cache(self, batch, max_cache_len, device, execution_dtype): def init_kv_cache(self, batch, max_cache_len, device, execution_dtype):

View File

@ -0,0 +1,193 @@
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import Qwen2Tokenizer
from comfy import sd1_clip
import comfy.text_encoders.qwen_vl
from .qwen35 import Qwen35VisionModel
from .llama import BaseLlama, BaseQwen3, BaseGenerate, Llama2_, Qwen3VL_4BConfig, Qwen3VL_8BConfig
QWEN3VL_VISION = {
"qwen3vl_4b": dict(hidden_size=1024, intermediate_size=4096, depth=24, deepstack_visual_indexes=[5, 11, 17]),
"qwen3vl_8b": dict(hidden_size=1152, intermediate_size=4304, depth=27, deepstack_visual_indexes=[8, 16, 24]),
}
QWEN3VL_VISION_COMMON = dict(num_heads=16, patch_size=16, temporal_patch_size=2, in_channels=3,
spatial_merge_size=2, num_position_embeddings=2304)
QWEN3VL_CONFIGS = {"qwen3vl_4b": Qwen3VL_4BConfig, "qwen3vl_8b": Qwen3VL_8BConfig}
class Qwen3VLDeepstackMerger(nn.Module):
# DeepStack merger: postshuffle LayerNorm (applied after spatial merge), unlike the main merger.
def __init__(self, hidden_size, spatial_merge_size, out_hidden_size, device=None, dtype=None, ops=None):
super().__init__()
self.merge_dim = hidden_size * (spatial_merge_size ** 2)
self.norm = ops.LayerNorm(self.merge_dim, eps=1e-6, device=device, dtype=dtype)
self.linear_fc1 = ops.Linear(self.merge_dim, self.merge_dim, device=device, dtype=dtype)
self.linear_fc2 = ops.Linear(self.merge_dim, out_hidden_size, device=device, dtype=dtype)
def forward(self, x):
x = self.norm(x.view(-1, self.merge_dim))
return self.linear_fc2(F.gelu(self.linear_fc1(x)))
class Qwen3VLVisionModel(Qwen35VisionModel):
# Qwen3.5 vision + DeepStack
def __init__(self, config, device=None, dtype=None, ops=None):
super().__init__(config, device=device, dtype=dtype, ops=ops)
self.deepstack_visual_indexes = config["deepstack_visual_indexes"]
self.deepstack_merger_list = nn.ModuleList([
Qwen3VLDeepstackMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops)
for _ in self.deepstack_visual_indexes
])
class Qwen3VL(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
model_type = "qwen3vl_8b"
def __init__(self, config_dict, dtype, device, operations):
super().__init__()
config = QWEN3VL_CONFIGS[self.model_type](**config_dict)
self.num_layers = config.num_hidden_layers
self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
vision_config = {**QWEN3VL_VISION_COMMON, **QWEN3VL_VISION[self.model_type], "out_hidden_size": config.hidden_size}
self.visual = Qwen3VLVisionModel(vision_config, device=device, dtype=dtype, ops=operations)
self.dtype = dtype
def preprocess_embed(self, embed, device):
if embed["type"] == "image":
# Qwen3-VL normalizes to [-1, 1] (mean/std 0.5), unlike Qwen2.5-VL's CLIP normalization.
image, grid = comfy.text_encoders.qwen_vl.process_qwen2vl_images(embed["data"], patch_size=16, image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5])
merged, deepstack = self.visual(image.to(device, dtype=torch.float32), grid)
return merged, {"grid": grid, "deepstack": deepstack}
return None, None
def build_image_inputs(self, embeds, embeds_info):
# Returns (position_ids, visual_pos_masks, deepstack) for the prompt
images = sorted([e for e in embeds_info if e.get("type") == "image"], key=lambda e: e["index"])
if len(images) == 0:
return None, None, None
device = embeds.device
seq = embeds.shape[1]
position_ids = comfy.text_encoders.qwen_vl.qwen2vl_mrope_position_ids(embeds_info, seq, device)
# DeepStack: mask of image positions + per-vision-layer features to inject there.
visual_pos_masks = torch.zeros((1, seq), dtype=torch.bool, device=device)
deepstack = None
for e in images:
start = e["index"]
end = e["size"] + start
visual_pos_masks[0, start:end] = True
ds = e["extra"]["deepstack"]
if deepstack is None:
deepstack = [d for d in ds]
else:
deepstack = [torch.cat([deepstack[i], ds[i]], dim=0) for i in range(len(ds))]
return position_ids, visual_pos_masks, deepstack
def _make_qwen3vl_model(model_type):
class Qwen3VL_(Qwen3VL):
pass
Qwen3VL_.model_type = model_type
return Qwen3VL_
class Qwen3VLClipModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="hidden", layer_idx=-1, dtype=None, attention_mask=True, model_options={}, model_type="qwen3vl_8b"):
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={},
dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False,
model_class=_make_qwen3vl_model(model_type), enable_attention_masks=attention_mask,
return_attention_masks=attention_mask, model_options=model_options)
def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=0.0):
if isinstance(tokens, dict):
tokens = next(iter(tokens.values()))
tokens_only = [[t[0] for t in b] for b in tokens]
embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
position_ids, visual_pos_masks, deepstack = self.transformer.build_image_inputs(embeds, embeds_info)
return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed,
presence_penalty=presence_penalty, position_ids=position_ids,
visual_pos_masks=visual_pos_masks, deepstack_embeds=deepstack)
class Qwen3VLTEModel(sd1_clip.SD1ClipModel):
def __init__(self, device="cpu", dtype=None, model_options={}, model_type="qwen3vl_8b"):
clip_model = lambda **kw: Qwen3VLClipModel(**kw, model_type=model_type)
super().__init__(device=device, dtype=dtype, name=model_type, clip_model=clip_model, model_options=model_options)
class Qwen3VLSDTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}, embedding_size=4096, embedding_key="qwen3vl_8b"):
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_class=Qwen2Tokenizer,
has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
class Qwen3VLTokenizer(sd1_clip.SD1Tokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}, model_type="qwen3vl_8b"):
embedding_size = 2560 if model_type == "qwen3vl_4b" else 4096
tokenizer = lambda *a, **kw: Qwen3VLSDTokenizer(*a, **kw, embedding_size=embedding_size, embedding_key=model_type)
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name=model_type, tokenizer=tokenizer)
self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
self.llama_template_images = "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=False, **kwargs):
image = kwargs.get("image", None)
if image is not None and len(images) == 0:
images = [image[i:i + 1] for i in range(image.shape[0])]
skip_template = text.startswith('<|im_start|>')
if prevent_empty_text and text == '':
text = ' '
if skip_template:
llama_text = text
else:
if llama_template is not None:
template = llama_template
elif len(images) == 0:
template = self.llama_template
else:
template = self.llama_template_images
if len(images) > 1:
vision_block = "<|vision_start|><|image_pad|><|vision_end|>"
template = template.replace(vision_block, vision_block * len(images), 1)
llama_text = template.format(text)
if not thinking: # Qwen3 convention: empty think block suppresses reasoning
llama_text += "<think>\n\n</think>\n\n"
tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
key_name = next(iter(tokens))
embed_count = 0
for r in tokens[key_name]:
for i in range(len(r)):
if r[i][0] == 151655: # <|image_pad|>
if len(images) > embed_count:
r[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"},) + r[i][1:]
embed_count += 1
return tokens
def tokenizer(model_type="qwen3vl_8b"):
class Qwen3VLTokenizer_(Qwen3VLTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type=model_type)
return Qwen3VLTokenizer_
def te(dtype_llama=None, llama_quantization_metadata=None, model_type="qwen3vl_8b"):
class Qwen3VLTEModel_(Qwen3VLTEModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options, model_type=model_type)
return Qwen3VLTEModel_

View File

@ -88,6 +88,32 @@ def process_qwen2vl_images(
return flatten_patches, image_grid_thw return flatten_patches, image_grid_thw
def qwen2vl_mrope_position_ids(embeds_info, seq_len, device):
# (3, seq_len) T/H/W MRoPE position ids: text runs sequentially, each image span gets its grid positions.
# Returns None when there are no image embeds. `extra` is the image grid_thw, or a dict carrying it under "grid".
position_ids = None
offset = 0
for e in embeds_info:
if e.get("type") == "image":
extra = e.get("extra", None)
grid = extra["grid"] if isinstance(extra, dict) else extra
start = e.get("index")
if position_ids is None:
position_ids = torch.zeros((3, seq_len), device=device)
position_ids[:, :start] = torch.arange(0, start, device=device)
end = e.get("size") + start
len_max = int(grid.max()) // 2
start_next = len_max + start
position_ids[:, end:] = torch.arange(start_next + offset, start_next + (seq_len - end) + offset, device=device)
position_ids[0, start:end] = start + offset
max_d = int(grid[0][1]) // 2
position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
max_d = int(grid[0][2]) // 2
position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start]
offset += len_max - (end - start)
return position_ids
class VisionPatchEmbed(nn.Module): class VisionPatchEmbed(nn.Module):
def __init__( def __init__(
self, self,

View File

@ -325,21 +325,25 @@ class VideoFromFile(VideoInput):
checked_alpha = True checked_alpha = True
# Fix non-deterministic video decode when the video width is not a multiple of 32 # Fix non-deterministic video decode when the video width is not a multiple of 32
# For non-yuvj pixel formats (all H.264/H.265 video) # For non-yuvj pixel formats: most H.264/H.265 video and static images (e.g. lossy WebP via LoadImage)
# Pad both axes to a multiple of 32 and smear the border so the alignment padding never bleeds into the cropped edges
if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0: if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0:
if align_graph is None: if align_graph is None:
pad_w = ((frame.width + 31) // 32) * 32 pad_w = ((frame.width + 31) // 32) * 32
pad_h = ((frame.height + 31) // 32) * 32
g = av.filter.Graph() g = av.filter.Graph()
g_src = g.add_buffer(width=frame.width, height=frame.height, g_src = g.add_buffer(width=frame.width, height=frame.height,
format=frame.format.name, time_base=video_stream.time_base) format=frame.format.name, time_base=video_stream.time_base)
g_pad = g.add('pad', f'{pad_w}:{frame.height}:0:0') g_pad = g.add('pad', f'{pad_w}:{pad_h}:0:0')
g_fill = g.add('fillborders', f'left=0:right={pad_w - frame.width}:top=0:bottom={pad_h - frame.height}:mode=smear')
g_sink = g.add('buffersink') g_sink = g.add('buffersink')
g_src.link_to(g_pad) g_src.link_to(g_pad)
g_pad.link_to(g_sink) g_pad.link_to(g_fill)
g_fill.link_to(g_sink)
g.configure() g.configure()
align_graph = (g, g_src, g_sink) align_graph = (g, g_src, g_sink)
align_graph[1].push(frame) align_graph[1].push(frame)
img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:, :frame.width]) img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:frame.height, :frame.width])
else: else:
img = frame.to_ndarray(format=image_format) img = frame.to_ndarray(format=image_format)
if frame.rotation != 0: if frame.rotation != 0:

View File

@ -100,8 +100,7 @@ class SoniloTextToMusic(IO.ComfyNode):
node_id="SoniloTextToMusic", node_id="SoniloTextToMusic",
display_name="Sonilo Text to Music", display_name="Sonilo Text to Music",
category="partner/audio/Sonilo", category="partner/audio/Sonilo",
description="Generate music from a text prompt using Sonilo's AI model. " description="Generate music from a text prompt using Sonilo's AI model.",
"Leave duration at 0 to let the model infer it from the prompt.",
inputs=[ inputs=[
IO.String.Input( IO.String.Input(
"prompt", "prompt",
@ -135,13 +134,7 @@ class SoniloTextToMusic(IO.ComfyNode):
is_api_node=True, is_api_node=True,
price_badge=IO.PriceBadge( price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["duration"]), depends_on=IO.PriceBadgeDepends(widgets=["duration"]),
expr=""" expr='{"type":"usd","usd": 0.0025 * widgets.duration}',
(
widgets.duration > 0
? {"type":"usd","usd": 0.005 * widgets.duration}
: {"type":"usd","usd": 0.005, "format":{"suffix":"/second"}}
)
""",
), ),
) )

View File

@ -11,7 +11,7 @@ class TextEncodeAceStepAudio(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="TextEncodeAceStepAudio", node_id="TextEncodeAceStepAudio",
category="model/conditioning", category="model/conditioning/ace",
inputs=[ inputs=[
IO.Clip.Input("clip"), IO.Clip.Input("clip"),
IO.String.Input("tags", multiline=True, dynamic_prompts=True), IO.String.Input("tags", multiline=True, dynamic_prompts=True),
@ -33,7 +33,7 @@ class TextEncodeAceStepAudio15(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="TextEncodeAceStepAudio1.5", node_id="TextEncodeAceStepAudio1.5",
category="model/conditioning", category="model/conditioning/ace",
inputs=[ inputs=[
IO.Clip.Input("clip"), IO.Clip.Input("clip"),
IO.String.Input("tags", multiline=True, dynamic_prompts=True), IO.String.Input("tags", multiline=True, dynamic_prompts=True),
@ -67,7 +67,7 @@ class EmptyAceStepLatentAudio(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="EmptyAceStepLatentAudio", node_id="EmptyAceStepLatentAudio",
display_name="Empty Ace Step 1.0 Latent Audio", display_name="Empty Ace Step 1.0 Latent Audio",
category="model/latent/audio", category="model/latent/ace",
inputs=[ inputs=[
IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1), IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1),
IO.Int.Input( IO.Int.Input(
@ -90,7 +90,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="EmptyAceStep1.5LatentAudio", node_id="EmptyAceStep1.5LatentAudio",
display_name="Empty Ace Step 1.5 Latent Audio", display_name="Empty Ace Step 1.5 Latent Audio",
category="model/latent/audio", category="model/latent/ace",
inputs=[ inputs=[
IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01), IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01),
IO.Int.Input( IO.Int.Input(
@ -111,8 +111,8 @@ class ReferenceAudio(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="ReferenceTimbreAudio", node_id="ReferenceTimbreAudio",
display_name="Reference Audio", display_name="Set Reference Audio",
category="advanced/conditioning/audio", category="model/conditioning",
is_experimental=True, is_experimental=True,
description="This node sets the reference audio for ace step 1.5", description="This node sets the reference audio for ace step 1.5",
inputs=[ inputs=[

View File

@ -16,7 +16,7 @@ class APG(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="APG", node_id="APG",
display_name="Adaptive Projected Guidance", display_name="Adaptive Projected Guidance",
category="model/sampling/custom_sampling", category="model/sampling/custom",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Float.Input( io.Float.Input(

View File

@ -19,7 +19,7 @@ class EmptyARVideoLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="EmptyARVideoLatent", node_id="EmptyARVideoLatent",
category="model/latent/video", category="model/latent/autoregressive",
inputs=[ inputs=[
io.Int.Input("width", default=832, min=16, max=8192, step=16), io.Int.Input("width", default=832, min=16, max=8192, step=16),
io.Int.Input("height", default=480, min=16, max=8192, step=16), io.Int.Input("height", default=480, min=16, max=8192, step=16),
@ -85,7 +85,7 @@ class ARVideoI2V(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="ARVideoI2V", node_id="ARVideoI2V",
category="model/conditioning/video_models", category="model/conditioning/autoregressive",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Vae.Input("vae"), io.Vae.Input("vae"),

View File

@ -16,7 +16,7 @@ class EmptyLatentAudio(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="EmptyLatentAudio", node_id="EmptyLatentAudio",
display_name="Empty Latent Audio", display_name="Empty Latent Audio",
category="model/latent/audio", category="model/latent",
essentials_category="Audio", essentials_category="Audio",
inputs=[ inputs=[
IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1), IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1),
@ -41,7 +41,7 @@ class ConditioningStableAudio(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="ConditioningStableAudio", node_id="ConditioningStableAudio",
category="model/conditioning", category="model/conditioning/stable audio",
inputs=[ inputs=[
IO.Conditioning.Input("positive"), IO.Conditioning.Input("positive"),
IO.Conditioning.Input("negative"), IO.Conditioning.Input("negative"),
@ -70,7 +70,7 @@ class VAEEncodeAudio(IO.ComfyNode):
node_id="VAEEncodeAudio", node_id="VAEEncodeAudio",
search_aliases=["audio to latent"], search_aliases=["audio to latent"],
display_name="VAE Encode Audio", display_name="VAE Encode Audio",
category="model/latent/audio", category="model/latent",
inputs=[ inputs=[
IO.Audio.Input("audio"), IO.Audio.Input("audio"),
IO.Vae.Input("vae"), IO.Vae.Input("vae"),
@ -115,7 +115,7 @@ class VAEDecodeAudio(IO.ComfyNode):
node_id="VAEDecodeAudio", node_id="VAEDecodeAudio",
search_aliases=["latent to audio"], search_aliases=["latent to audio"],
display_name="VAE Decode Audio", display_name="VAE Decode Audio",
category="model/latent/audio", category="model/latent",
inputs=[ inputs=[
IO.Latent.Input("samples"), IO.Latent.Input("samples"),
IO.Vae.Input("vae"), IO.Vae.Input("vae"),
@ -137,7 +137,7 @@ class VAEDecodeAudioTiled(IO.ComfyNode):
node_id="VAEDecodeAudioTiled", node_id="VAEDecodeAudioTiled",
search_aliases=["latent to audio"], search_aliases=["latent to audio"],
display_name="VAE Decode Audio (Tiled)", display_name="VAE Decode Audio (Tiled)",
category="model/latent/audio", category="model/latent",
inputs=[ inputs=[
IO.Latent.Input("samples"), IO.Latent.Input("samples"),
IO.Vae.Input("vae"), IO.Vae.Input("vae"),

View File

@ -39,9 +39,9 @@ class BerniniConditioning(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="BerniniConditioning", node_id="BerniniConditioning",
display_name="Bernini Conditioning", display_name="Bernini Conditioning",
category="conditioning/video_models", category="model/conditioning/bernini",
description="Conditioning node for Bernini in-context video/image conditioning. It can be used for the following tasks: t2v (text-to-video), v2v (video-to-video), rv2v (reference-guided video editing), r2v (reference-to-video), ads2v (insert image/video into video)." description="Conditioning node for Bernini in-context video/image conditioning. It can be used for the following tasks: t2v (text-to-video), v2v (video-to-video), rv2v (reference-guided video editing), r2v (reference-to-video), ads2v (insert image/video into video)."
"Reference images injected as in-context tokens (r2v, rv2v) are encoded independently at their own native aspect ratio (long edge capped at ref_max_size)", "Reference images injected as in-context tokens (r2v, rv2v) are encoded independently at their own native aspect ratio (long edge capped at ref_max_size)",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -50,14 +50,11 @@ class BerniniConditioning(io.ComfyNode):
io.Int.Input("height", default=480, min=16, max=8192, step=16), io.Int.Input("height", default=480, min=16, max=8192, step=16),
io.Int.Input("length", default=81, min=1, max=8192, step=4), io.Int.Input("length", default=81, min=1, max=8192, step=4),
io.Int.Input("batch_size", default=1, min=1, max=4096), io.Int.Input("batch_size", default=1, min=1, max=4096),
io.Image.Input("source_video", optional=True, tooltip=( io.Image.Input("source_video", optional=True, tooltip=("Source video to edit or restyle (v2v, rv2v). Resized to width/height and trimmed to length.")),
"Source video to edit or restyle (v2v, rv2v). Resized to width/height and trimmed to length.")), io.Image.Input("reference_video", optional=True, tooltip=("Video to insert into the source video (ads2v).")),
io.Image.Input("reference_video", optional=True, tooltip=(
"Video to insert into the source video (ads2v).")),
io.Autogrow.Input("reference_images", optional=True, io.Autogrow.Input("reference_images", optional=True,
template=io.Autogrow.TemplatePrefix( template=io.Autogrow.TemplatePrefix(
input=io.Image.Input("reference_image", tooltip=( input=io.Image.Input("reference_image", tooltip=("Reference image injected as an in-context token (r2v, rv2v).")),
"Reference image injected as an in-context token (r2v, rv2v).")),
prefix="reference_image_", min=0, max=8)), prefix="reference_image_", min=0, max=8)),
io.Int.Input("ref_max_size", default=848, min=16, max=8192, step=16, optional=True, tooltip=( io.Int.Input("ref_max_size", default=848, min=16, max=8192, step=16, optional=True, tooltip=(
"Max size for the long edge of reference_video and reference_images. Resized with preserved aspect ratio and snapped to 16px.")), "Max size for the long edge of reference_video and reference_images. Resized with preserved aspect ratio and snapped to 16px.")),
@ -70,10 +67,8 @@ class BerniniConditioning(io.ComfyNode):
) )
@classmethod @classmethod
def execute(cls, positive, negative, vae, width, height, length, batch_size, def execute(cls, positive, negative, vae, width, height, length, batch_size, source_video=None, reference_video=None, reference_images=None, ref_max_size=848) -> io.NodeOutput:
source_video=None, reference_video=None, reference_images=None, ref_max_size=848) -> io.NodeOutput: latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8],
device=comfy.model_management.intermediate_device())
# source_video (1), reference_video (2), reference_images (3, 4, ...). # source_video (1), reference_video (2), reference_images (3, 4, ...).
context = [] context = []
@ -106,9 +101,7 @@ class BerniniConditioning(io.ComfyNode):
class BerniniExtension(ComfyExtension): class BerniniExtension(ComfyExtension):
@override @override
async def get_node_list(self) -> list[type[io.ComfyNode]]: async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [ return [BerniniConditioning,]
BerniniConditioning,
]
async def comfy_entrypoint() -> BerniniExtension: async def comfy_entrypoint() -> BerniniExtension:

View File

@ -153,7 +153,7 @@ class WanCameraEmbedding(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanCameraEmbedding", node_id="WanCameraEmbedding",
category="model/conditioning/video_models", category="model/conditioning/wan/camera",
inputs=[ inputs=[
io.Combo.Input( io.Combo.Input(
"camera_pose", "camera_pose",

View File

@ -13,7 +13,7 @@ class EmptyChromaRadianceLatentImage(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="EmptyChromaRadianceLatentImage", node_id="EmptyChromaRadianceLatentImage",
category="model/latent/chroma_radiance", category="model/latent/chroma radiance",
inputs=[ inputs=[
io.Int.Input(id="width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input(id="width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input(id="height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input(id="height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -33,7 +33,7 @@ class ChromaRadianceOptions(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="ChromaRadianceOptions", node_id="ChromaRadianceOptions",
category="model/patch/chroma_radiance", category="model/patch/chroma radiance",
description="Allows setting advanced options for the Chroma Radiance model.", description="Allows setting advanced options for the Chroma Radiance model.",
inputs=[ inputs=[
io.Model.Input(id="model"), io.Model.Input(id="model"),

View File

@ -9,7 +9,8 @@ class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeSDXLRefiner", node_id="CLIPTextEncodeSDXLRefiner",
category="advanced/conditioning", display_name="CLIP Text Encode (SDXL Refiner)",
category="model/conditioning/stable diffusion",
inputs=[ inputs=[
io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01), io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
@ -30,7 +31,8 @@ class CLIPTextEncodeSDXL(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeSDXL", node_id="CLIPTextEncodeSDXL",
category="advanced/conditioning", display_name="CLIP Text Encode (SDXL)",
category="model/conditioning/stable diffusion",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),

View File

@ -66,6 +66,7 @@ class WanContextWindowsManualNode(ContextWindowsManualNode):
schema.node_id = "WanContextWindowsManual" schema.node_id = "WanContextWindowsManual"
schema.display_name = "WAN Context Windows (Manual)" schema.display_name = "WAN Context Windows (Manual)"
schema.description = "Manually set context windows for WAN-like models (dim=2)." schema.description = "Manually set context windows for WAN-like models (dim=2)."
schema.category="model/patch/wan"
schema.inputs = [ schema.inputs = [
io.Model.Input("model", tooltip="The model to apply context windows to during sampling."), io.Model.Input("model", tooltip="The model to apply context windows to during sampling."),
io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window.", advanced=True), io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window.", advanced=True),

View File

@ -9,6 +9,8 @@ class SetUnionControlNetType(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SetUnionControlNetType", node_id="SetUnionControlNetType",
search_aliases=["set controlnet type", "union controlnet type"],
display_name="Set Union ControlNet Type",
category="model/conditioning/controlnet", category="model/conditioning/controlnet",
inputs=[ inputs=[
io.ControlNet.Input("control_net"), io.ControlNet.Input("control_net"),
@ -39,6 +41,7 @@ class ControlNetInpaintingAliMamaApply(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="ControlNetInpaintingAliMamaApply", node_id="ControlNetInpaintingAliMamaApply",
search_aliases=["masked controlnet"], search_aliases=["masked controlnet"],
display_name="Apply ControlNet Inpainting (AliMama)",
category="model/conditioning/controlnet", category="model/conditioning/controlnet",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),

View File

@ -13,7 +13,7 @@ class EmptyCosmosLatentVideo(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="EmptyCosmosLatentVideo", node_id="EmptyCosmosLatentVideo",
category="model/latent/video", category="model/latent/cosmos",
inputs=[ inputs=[
io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=704, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=704, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -45,7 +45,7 @@ class CosmosImageToVideoLatent(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="CosmosImageToVideoLatent", node_id="CosmosImageToVideoLatent",
category="model/conditioning/inpaint", category="model/conditioning/cosmos",
inputs=[ inputs=[
io.Vae.Input("vae"), io.Vae.Input("vae"),
io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -88,7 +88,7 @@ class CosmosPredict2ImageToVideoLatent(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="CosmosPredict2ImageToVideoLatent", node_id="CosmosPredict2ImageToVideoLatent",
category="model/conditioning/inpaint", category="model/conditioning/cosmos",
inputs=[ inputs=[
io.Vae.Input("vae"), io.Vae.Input("vae"),
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),

View File

@ -729,7 +729,7 @@ class SamplerCustom(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SamplerCustom", node_id="SamplerCustom",
category="model/sampling/custom_sampling", category="model/sampling/custom",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Boolean.Input("add_noise", default=True, advanced=True), io.Boolean.Input("add_noise", default=True, advanced=True),
@ -1015,7 +1015,7 @@ class SamplerCustomAdvanced(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SamplerCustomAdvanced", node_id="SamplerCustomAdvanced",
category="model/sampling/custom_sampling", category="model/sampling/custom",
inputs=[ inputs=[
io.Noise.Input("noise"), io.Noise.Input("noise"),
io.Guider.Input("guider"), io.Guider.Input("guider"),
@ -1143,7 +1143,7 @@ class CFGOverride(io.ComfyNode):
display_name="CFG Override", display_name="CFG Override",
description="Override cfg to a fixed value over a [start, end] percent (sigma) range. " description="Override cfg to a fixed value over a [start, end] percent (sigma) range. "
"With multiple overrides, the one nearest the sampler wins on overlap.", "With multiple overrides, the one nearest the sampler wins on overlap.",
category="sampling/custom_sampling", category="model/sampling/guiders",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01), io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01),

View File

@ -363,7 +363,7 @@ class EasyCacheNode(io.ComfyNode):
node_id="EasyCache", node_id="EasyCache",
display_name="EasyCache", display_name="EasyCache",
description="Native EasyCache implementation.", description="Native EasyCache implementation.",
category="advanced/debug/model", category="advanced/debug",
is_experimental=True, is_experimental=True,
inputs=[ inputs=[
io.Model.Input("model", tooltip="The model to add EasyCache to."), io.Model.Input("model", tooltip="The model to add EasyCache to."),
@ -496,7 +496,7 @@ class LazyCacheNode(io.ComfyNode):
node_id="LazyCache", node_id="LazyCache",
display_name="LazyCache", display_name="LazyCache",
description="A homebrew version of EasyCache - even 'easier' version of EasyCache to implement. Overall works worse than EasyCache, but better in some rare cases AND universal compatibility with everything in ComfyUI.", description="A homebrew version of EasyCache - even 'easier' version of EasyCache to implement. Overall works worse than EasyCache, but better in some rare cases AND universal compatibility with everything in ComfyUI.",
category="advanced/debug/model", category="advanced/debug",
is_experimental=True, is_experimental=True,
inputs=[ inputs=[
io.Model.Input("model", tooltip="The model to add LazyCache to."), io.Model.Input("model", tooltip="The model to add LazyCache to."),

View File

@ -8,7 +8,8 @@ class ReferenceLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="ReferenceLatent", node_id="ReferenceLatent",
category="advanced/conditioning/edit_models", display_name="Set Reference Latent",
category="model/conditioning",
description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.", description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.",
inputs=[ inputs=[
io.Conditioning.Input("conditioning"), io.Conditioning.Input("conditioning"),

View File

@ -13,7 +13,7 @@ class CLIPTextEncodeFlux(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeFlux", node_id="CLIPTextEncodeFlux",
category="advanced/conditioning/flux", category="model/conditioning/flux",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("clip_l", multiline=True, dynamic_prompts=True), io.String.Input("clip_l", multiline=True, dynamic_prompts=True),
@ -40,7 +40,7 @@ class EmptyFlux2LatentImage(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="EmptyFlux2LatentImage", node_id="EmptyFlux2LatentImage",
display_name="Empty Flux 2 Latent", display_name="Empty Flux 2 Latent",
category="model/latent", category="model/latent/flux",
inputs=[ inputs=[
io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -61,7 +61,7 @@ class FluxGuidance(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="FluxGuidance", node_id="FluxGuidance",
category="advanced/conditioning/flux", category="model/conditioning/flux",
inputs=[ inputs=[
io.Conditioning.Input("conditioning"), io.Conditioning.Input("conditioning"),
io.Float.Input("guidance", default=3.5, min=0.0, max=100.0, step=0.1), io.Float.Input("guidance", default=3.5, min=0.0, max=100.0, step=0.1),
@ -84,7 +84,7 @@ class FluxDisableGuidance(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="FluxDisableGuidance", node_id="FluxDisableGuidance",
category="advanced/conditioning/flux", category="model/conditioning/flux",
description="This node completely disables the guidance embed on Flux and Flux like models", description="This node completely disables the guidance embed on Flux and Flux like models",
inputs=[ inputs=[
io.Conditioning.Input("conditioning"), io.Conditioning.Input("conditioning"),
@ -128,7 +128,7 @@ class FluxKontextImageScale(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="FluxKontextImageScale", node_id="FluxKontextImageScale",
category="advanced/conditioning/flux", category="model/conditioning/flux",
description="This node resizes the image to one that is more optimal for flux kontext.", description="This node resizes the image to one that is more optimal for flux kontext.",
inputs=[ inputs=[
io.Image.Input("image"), io.Image.Input("image"),
@ -156,7 +156,7 @@ class FluxKontextMultiReferenceLatentMethod(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="FluxKontextMultiReferenceLatentMethod", node_id="FluxKontextMultiReferenceLatentMethod",
display_name="Edit Model Reference Method", display_name="Edit Model Reference Method",
category="advanced/conditioning/flux", category="model/conditioning/flux",
inputs=[ inputs=[
io.Conditioning.Input("conditioning"), io.Conditioning.Input("conditioning"),
io.Combo.Input( io.Combo.Input(

View File

@ -11,8 +11,9 @@ class QuadrupleCLIPLoader(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="QuadrupleCLIPLoader", node_id="QuadrupleCLIPLoader",
category="advanced/loaders", display_name="Load CLIP (Quadruple)",
description="[Recipes]\n\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct", category="model/loaders",
description="Recipes:\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct",
inputs=[ inputs=[
io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")),
io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")),
@ -38,8 +39,9 @@ class CLIPTextEncodeHiDream(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeHiDream", node_id="CLIPTextEncodeHiDream",
display_name="CLIP Text Encode (HiDream)",
search_aliases=["hidream prompt"], search_aliases=["hidream prompt"],
category="advanced/conditioning", category="model/conditioning/hidream",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("clip_l", multiline=True, dynamic_prompts=True), io.String.Input("clip_l", multiline=True, dynamic_prompts=True),

View File

@ -14,7 +14,7 @@ class EmptyHiDreamO1LatentImage(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="EmptyHiDreamO1LatentImage", node_id="EmptyHiDreamO1LatentImage",
display_name="Empty HiDream-O1 Latent Image", display_name="Empty HiDream-O1 Latent Image",
category="model/latent/image", category="model/latent/hidream",
description=( description=(
"Empty pixel-space latent for HiDream-O1-Image. The model was " "Empty pixel-space latent for HiDream-O1-Image. The model was "
"trained at ~4 megapixels; lower resolutions go off-distribution " "trained at ~4 megapixels; lower resolutions go off-distribution "
@ -47,7 +47,7 @@ class HiDreamO1ReferenceImages(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="HiDreamO1ReferenceImages", node_id="HiDreamO1ReferenceImages",
display_name="HiDream-O1 Reference Images", display_name="HiDream-O1 Reference Images",
category="model/conditioning/image", category="model/conditioning/hidream",
description=( description=(
"Attach 1-10 reference images to conditioning, one for edit instruction" "Attach 1-10 reference images to conditioning, one for edit instruction"
"or multiple for subject-driven personalization." "or multiple for subject-driven personalization."
@ -117,7 +117,7 @@ class HiDreamO1PatchSeamSmoothing(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="HiDreamO1PatchSeamSmoothing", node_id="HiDreamO1PatchSeamSmoothing",
display_name="HiDream-O1 Patch Seam Smoothing", display_name="HiDream-O1 Patch Seam Smoothing",
category="advanced/model", category="model/patch/hidream",
is_experimental=True, is_experimental=True,
description=( description=(
"Average the model output across multiple shifted patch-grid " "Average the model output across multiple shifted patch-grid "

View File

@ -14,7 +14,8 @@ class CLIPTextEncodeHunyuanDiT(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeHunyuanDiT", node_id="CLIPTextEncodeHunyuanDiT",
category="advanced/conditioning", display_name="CLIP Text Encode (Hunyuan Image)",
category="model/conditioning/hunyuan image",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("bert", multiline=True, dynamic_prompts=True), io.String.Input("bert", multiline=True, dynamic_prompts=True),
@ -41,7 +42,7 @@ class EmptyHunyuanLatentVideo(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="EmptyHunyuanLatentVideo", node_id="EmptyHunyuanLatentVideo",
display_name="Empty HunyuanVideo 1.0 Latent", display_name="Empty HunyuanVideo 1.0 Latent",
category="model/latent/video", category="model/latent/hunyuan video",
inputs=[ inputs=[
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -67,6 +68,7 @@ class EmptyHunyuanVideo15Latent(EmptyHunyuanLatentVideo):
schema = super().define_schema() schema = super().define_schema()
schema.node_id = "EmptyHunyuanVideo15Latent" schema.node_id = "EmptyHunyuanVideo15Latent"
schema.display_name = "Empty HunyuanVideo 1.5 Latent" schema.display_name = "Empty HunyuanVideo 1.5 Latent"
schema.category = "model/latent/hunyuan video"
return schema return schema
@classmethod @classmethod
@ -81,7 +83,7 @@ class HunyuanVideo15ImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="HunyuanVideo15ImageToVideo", node_id="HunyuanVideo15ImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/hunyuan video",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -132,7 +134,7 @@ class HunyuanVideo15SuperResolution(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="HunyuanVideo15SuperResolution", node_id="HunyuanVideo15SuperResolution",
display_name="Hunyuan Video 1.5 Super Resolution", display_name="Hunyuan Video 1.5 Super Resolution",
category="model/conditioning/video_models", category="model/conditioning/hunyuan video",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -227,7 +229,7 @@ class HunyuanVideo15LatentUpscaleWithModel(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="HunyuanVideo15LatentUpscaleWithModel", node_id="HunyuanVideo15LatentUpscaleWithModel",
display_name="Hunyuan Video 15 Latent Upscale With Model", display_name="Hunyuan Video 15 Latent Upscale With Model",
category="model/latent", category="model/latent/hunyhuan video",
inputs=[ inputs=[
io.LatentUpscaleModel.Input("model"), io.LatentUpscaleModel.Input("model"),
io.Latent.Input("samples"), io.Latent.Input("samples"),
@ -276,7 +278,7 @@ class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TextEncodeHunyuanVideo_ImageToVideo", node_id="TextEncodeHunyuanVideo_ImageToVideo",
category="advanced/conditioning", category="model/conditioning/hunyuan video",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.ClipVisionOutput.Input("clip_vision_output"), io.ClipVisionOutput.Input("clip_vision_output"),
@ -308,7 +310,7 @@ class HunyuanImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="HunyuanImageToVideo", node_id="HunyuanImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/hunyuan video",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Vae.Input("vae"), io.Vae.Input("vae"),
@ -359,7 +361,7 @@ class EmptyHunyuanImageLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="EmptyHunyuanImageLatent", node_id="EmptyHunyuanImageLatent",
category="model/latent", category="model/latent/hunyuan image",
inputs=[ inputs=[
io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
@ -384,7 +386,7 @@ class HunyuanRefinerLatent(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="HunyuanRefinerLatent", node_id="HunyuanRefinerLatent",
display_name="Hunyuan Latent Refiner", display_name="Hunyuan Latent Refiner",
category="model/conditioning/video_models", category="model/conditioning/hunyuan video",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),

View File

@ -12,7 +12,7 @@ class EmptyLatentHunyuan3Dv2(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="EmptyLatentHunyuan3Dv2", node_id="EmptyLatentHunyuan3Dv2",
category="model/latent/3d", category="model/latent/hunyuan 3d",
inputs=[ inputs=[
IO.Int.Input("resolution", default=3072, min=1, max=8192), IO.Int.Input("resolution", default=3072, min=1, max=8192),
IO.Int.Input("batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."), IO.Int.Input("batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."),
@ -35,7 +35,7 @@ class Hunyuan3Dv2Conditioning(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="Hunyuan3Dv2Conditioning", node_id="Hunyuan3Dv2Conditioning",
category="model/conditioning/3d_models", category="model/conditioning/hunyuan 3d",
inputs=[ inputs=[
IO.ClipVisionOutput.Input("clip_vision_output"), IO.ClipVisionOutput.Input("clip_vision_output"),
], ],
@ -60,7 +60,7 @@ class Hunyuan3Dv2ConditioningMultiView(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="Hunyuan3Dv2ConditioningMultiView", node_id="Hunyuan3Dv2ConditioningMultiView",
category="model/conditioning/3d_models", category="model/conditioning/hunyuan 3d",
inputs=[ inputs=[
IO.ClipVisionOutput.Input("front", optional=True), IO.ClipVisionOutput.Input("front", optional=True),
IO.ClipVisionOutput.Input("left", optional=True), IO.ClipVisionOutput.Input("left", optional=True),
@ -97,7 +97,7 @@ class VAEDecodeHunyuan3D(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="VAEDecodeHunyuan3D", node_id="VAEDecodeHunyuan3D",
category="model/latent/3d", category="model/latent/hunyuan 3d",
inputs=[ inputs=[
IO.Latent.Input("samples"), IO.Latent.Input("samples"),
IO.Vae.Input("vae"), IO.Vae.Input("vae"),

View File

@ -38,7 +38,7 @@ class Ideogram4Scheduler(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="Ideogram4Scheduler", node_id="Ideogram4Scheduler",
display_name="Ideogram 4 Scheduler", display_name="Ideogram 4 Scheduler",
category="sampling/custom_sampling/schedulers", category="model/sampling/schedulers",
inputs=[ inputs=[
io.Int.Input("steps", default=20, min=1, max=200), io.Int.Input("steps", default=20, min=1, max=200),
io.Int.Input("width", default=1024, min=256, max=8192, step=16), io.Int.Input("width", default=1024, min=256, max=8192, step=16),

View File

@ -13,7 +13,7 @@ class Kandinsky5ImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="Kandinsky5ImageToVideo", node_id="Kandinsky5ImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/kandinsky",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -71,7 +71,7 @@ class NormalizeVideoLatentStart(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="NormalizeVideoLatentStart", node_id="NormalizeVideoLatentStart",
category="model/conditioning/video_models", category="model/conditioning",
description="Normalizes the initial frames of a video latent to match the mean and standard deviation of subsequent reference frames. Helps reduce differences between the starting frames and the rest of the video.", description="Normalizes the initial frames of a video latent to match the mean and standard deviation of subsequent reference frames. Helps reduce differences between the starting frames and the rest of the video.",
inputs=[ inputs=[
io.Latent.Input("latent"), io.Latent.Input("latent"),
@ -104,8 +104,9 @@ class CLIPTextEncodeKandinsky5(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeKandinsky5", node_id="CLIPTextEncodeKandinsky5",
display_name="CLIP Text Encode (Kandinsky 5)",
search_aliases=["kandinsky prompt"], search_aliases=["kandinsky prompt"],
category="advanced/conditioning/kandinsky5", category="model/conditioning/kandinsky",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("clip_l", multiline=True, dynamic_prompts=True), io.String.Input("clip_l", multiline=True, dynamic_prompts=True),

View File

@ -262,6 +262,7 @@ class LatentBatch(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="LatentBatch", node_id="LatentBatch",
search_aliases=["combine latents", "merge latents", "join latents"], search_aliases=["combine latents", "merge latents", "join latents"],
display_name="Batch Latents (DEPRECATED)",
category="model/latent/batch", category="model/latent/batch",
is_deprecated=True, is_deprecated=True,
inputs=[ inputs=[
@ -447,6 +448,7 @@ class ReplaceVideoLatentFrames(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="ReplaceVideoLatentFrames", node_id="ReplaceVideoLatentFrames",
display_name="Replace Video Latent Frames",
category="model/latent/batch", category="model/latent/batch",
inputs=[ inputs=[
io.Latent.Input("destination", tooltip="The destination latent where frames will be replaced."), io.Latent.Input("destination", tooltip="The destination latent where frames will be replaced."),

View File

@ -25,7 +25,7 @@ class GetICLoRAParameters(io.ComfyNode):
display_name="Get IC-LoRA Parameters", display_name="Get IC-LoRA Parameters",
description="Extracts IC-LoRA parameters from the safetensors metadata of a LoRA-loaded " description="Extracts IC-LoRA parameters from the safetensors metadata of a LoRA-loaded "
"model and outputs them for LTXVAddGuide (eg. reference_downscale_factor).", "model and outputs them for LTXVAddGuide (eg. reference_downscale_factor).",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
search_aliases=["ic-lora", "ic lora", "iclora", "downscale factor", "reference downscale"], search_aliases=["ic-lora", "ic lora", "iclora", "downscale factor", "reference downscale"],
inputs=[ inputs=[
io.Model.Input( io.Model.Input(
@ -62,7 +62,7 @@ class EmptyLTXVLatentVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="EmptyLTXVLatentVideo", node_id="EmptyLTXVLatentVideo",
category="model/latent/video/ltxv", category="model/latent/ltxv",
inputs=[ inputs=[
io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32),
io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32),
@ -86,7 +86,7 @@ class LTXVImgToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVImgToVideo", node_id="LTXVImgToVideo",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -131,7 +131,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVImgToVideoInplace", node_id="LTXVImgToVideoInplace",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
inputs=[ inputs=[
io.Vae.Input("vae"), io.Vae.Input("vae"),
io.Image.Input("image"), io.Image.Input("image"),
@ -251,7 +251,7 @@ class LTXVAddGuide(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVAddGuide", node_id="LTXVAddGuide",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -498,7 +498,7 @@ class LTXVCropGuides(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVCropGuides", node_id="LTXVCropGuides",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -542,7 +542,7 @@ class LTXVConditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVConditioning", node_id="LTXVConditioning",
category="model/conditioning/video_models", category="model/conditioning/ltxv",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -566,7 +566,7 @@ class ModelSamplingLTXV(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="ModelSamplingLTXV", node_id="ModelSamplingLTXV",
category="advanced/model", category="model/patch/ltxv",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01), io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
@ -746,7 +746,7 @@ class LTXVConcatAVLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVConcatAVLatent", node_id="LTXVConcatAVLatent",
category="model/latent/video/ltxv", category="model/latent/ltxv",
inputs=[ inputs=[
io.Latent.Input("video_latent"), io.Latent.Input("video_latent"),
io.Latent.Input("audio_latent"), io.Latent.Input("audio_latent"),
@ -781,7 +781,7 @@ class LTXVSeparateAVLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="LTXVSeparateAVLatent", node_id="LTXVSeparateAVLatent",
category="model/latent/video/ltxv", category="model/latent/ltxv",
description="LTXV Separate AV Latent", description="LTXV Separate AV Latent",
inputs=[ inputs=[
io.Latent.Input("av_latent"), io.Latent.Input("av_latent"),
@ -814,7 +814,7 @@ class LTXVReferenceAudio(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="LTXVReferenceAudio", node_id="LTXVReferenceAudio",
display_name="LTXV Reference Audio (ID-LoRA)", display_name="LTXV Reference Audio (ID-LoRA)",
category="model/conditioning/audio", category="model/conditioning/ltxv",
description="Set reference audio for ID-LoRA speaker identity transfer. Encodes a reference audio clip into the conditioning and optionally patches the model with identity guidance (extra forward pass without reference, amplifying the speaker identity effect).", description="Set reference audio for ID-LoRA speaker identity transfer. Encodes a reference audio clip into the conditioning and optionally patches the model with identity guidance (extra forward pass without reference, amplifying the speaker identity effect).",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),

View File

@ -40,7 +40,7 @@ class LTXVAudioVAEEncode(VAEEncodeAudio):
return io.Schema( return io.Schema(
node_id="LTXVAudioVAEEncode", node_id="LTXVAudioVAEEncode",
display_name="LTXV Audio VAE Encode", display_name="LTXV Audio VAE Encode",
category="model/latent/audio", category="model/latent/ltxv",
inputs=[ inputs=[
io.Audio.Input("audio", tooltip="The audio to be encoded."), io.Audio.Input("audio", tooltip="The audio to be encoded."),
io.Vae.Input( io.Vae.Input(
@ -63,7 +63,7 @@ class LTXVAudioVAEDecode(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="LTXVAudioVAEDecode", node_id="LTXVAudioVAEDecode",
display_name="LTXV Audio VAE Decode", display_name="LTXV Audio VAE Decode",
category="model/latent/audio", category="model/latent/ltxv",
inputs=[ inputs=[
io.Latent.Input("samples", tooltip="The latent to be decoded."), io.Latent.Input("samples", tooltip="The latent to be decoded."),
io.Vae.Input( io.Vae.Input(
@ -96,7 +96,7 @@ class LTXVEmptyLatentAudio(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="LTXVEmptyLatentAudio", node_id="LTXVEmptyLatentAudio",
display_name="LTXV Empty Latent Audio", display_name="LTXV Empty Latent Audio",
category="model/latent/audio", category="model/latent/ltxv",
inputs=[ inputs=[
io.Int.Input( io.Int.Input(
"frames_number", "frames_number",
@ -168,9 +168,9 @@ class LTXAVTextEncoderLoader(io.ComfyNode):
def define_schema(cls) -> io.Schema: def define_schema(cls) -> io.Schema:
return io.Schema( return io.Schema(
node_id="LTXAVTextEncoderLoader", node_id="LTXAVTextEncoderLoader",
display_name="LTXV Audio Text Encoder Loader", display_name="Load LTXV Audio Text Encoder",
category="advanced/loaders", category="model/loaders",
description="[Recipes]\n\nltxav: gemma 3 12B", description="Recipes:\nltxav: gemma 3 12B",
inputs=[ inputs=[
io.Combo.Input( io.Combo.Input(
"text_encoder", "text_encoder",

View File

@ -13,7 +13,7 @@ class LTXVLatentUpsampler(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="LTXVLatentUpsampler", node_id="LTXVLatentUpsampler",
category="model/latent/video", category="model/latent/ltxv",
is_experimental=True, is_experimental=True,
inputs=[ inputs=[
IO.Latent.Input("samples"), IO.Latent.Input("samples"),

View File

@ -9,7 +9,7 @@ class RenormCFG(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="RenormCFG", node_id="RenormCFG",
category="advanced/model", category="model/patch",
inputs=[ inputs=[
io.Model.Input("model"), io.Model.Input("model"),
io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01, advanced=True), io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01, advanced=True),
@ -80,8 +80,8 @@ class CLIPTextEncodeLumina2(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeLumina2", node_id="CLIPTextEncodeLumina2",
search_aliases=["lumina prompt"], search_aliases=["lumina prompt"],
display_name="CLIP Text Encode for Lumina2", display_name="CLIP Text Encode (Lumina 2)",
category="model/conditioning", category="model/conditioning/lumina",
description="Encodes a system prompt and a user prompt using a CLIP model into an embedding " description="Encodes a system prompt and a user prompt using a CLIP model into an embedding "
"that can be used to guide the diffusion model towards generating specific images.", "that can be used to guide the diffusion model towards generating specific images.",
inputs=[ inputs=[

View File

@ -53,6 +53,7 @@ class LatentCompositeMasked(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="LatentCompositeMasked", node_id="LatentCompositeMasked",
search_aliases=["overlay latent", "layer latent", "paste latent", "inpaint latent"], search_aliases=["overlay latent", "layer latent", "paste latent", "inpaint latent"],
display_name="Latent Composite Masked",
category="model/latent", category="model/latent",
inputs=[ inputs=[
IO.Latent.Input("destination"), IO.Latent.Input("destination"),

View File

@ -10,7 +10,7 @@ class EmptyMochiLatentVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="EmptyMochiLatentVideo", node_id="EmptyMochiLatentVideo",
category="model/latent/video", category="model/latent/mochi",
inputs=[ inputs=[
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),

View File

@ -59,7 +59,7 @@ class ModelSamplingDiscrete:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch"
def patch(self, model, sampling, zsnr): def patch(self, model, sampling, zsnr):
m = model.clone() m = model.clone()
@ -97,7 +97,7 @@ class ModelSamplingStableCascade:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch/stable cascade"
def patch(self, model, shift): def patch(self, model, shift):
m = model.clone() m = model.clone()
@ -123,7 +123,7 @@ class ModelSamplingSD3:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch/stable diffusion"
def patch(self, model, shift, multiplier=1000): def patch(self, model, shift, multiplier=1000):
m = model.clone() m = model.clone()
@ -150,6 +150,7 @@ class ModelSamplingAuraFlow(ModelSamplingSD3):
}} }}
FUNCTION = "patch_aura" FUNCTION = "patch_aura"
CATEGORY = "model/patch"
def patch_aura(self, model, shift): def patch_aura(self, model, shift):
return self.patch(model, shift, multiplier=1.0) return self.patch(model, shift, multiplier=1.0)
@ -167,7 +168,7 @@ class ModelSamplingFlux:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch/flux"
def patch(self, model, max_shift, base_shift, width, height): def patch(self, model, max_shift, base_shift, width, height):
m = model.clone() m = model.clone()
@ -202,7 +203,7 @@ class ModelSamplingContinuousEDM:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch"
def patch(self, model, sampling, sigma_max, sigma_min): def patch(self, model, sampling, sigma_max, sigma_min):
m = model.clone() m = model.clone()
@ -247,7 +248,7 @@ class ModelSamplingContinuousV:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch"
def patch(self, model, sampling, sigma_max, sigma_min): def patch(self, model, sampling, sigma_max, sigma_min):
m = model.clone() m = model.clone()
@ -273,7 +274,7 @@ class RescaleCFG:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch"
def patch(self, model, multiplier): def patch(self, model, multiplier):
def rescale_cfg(args): def rescale_cfg(args):
@ -314,7 +315,7 @@ class ModelNoiseScale:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/model" CATEGORY = "model/patch"
def patch(self, model, noise_scale): def patch(self, model, noise_scale):
m = model.clone() m = model.clone()
@ -337,7 +338,7 @@ class ModelComputeDtype:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "patch" FUNCTION = "patch"
CATEGORY = "advanced/debug/model" CATEGORY = "advanced/debug"
def patch(self, model, dtype): def patch(self, model, dtype):
m = model.clone() m = model.clone()

View File

@ -21,7 +21,7 @@ class ModelMergeSimple:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, model1, model2, ratio): def merge(self, model1, model2, ratio):
m = model1.clone() m = model1.clone()
@ -40,7 +40,7 @@ class ModelSubtract:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, model1, model2, multiplier): def merge(self, model1, model2, multiplier):
m = model1.clone() m = model1.clone()
@ -58,7 +58,7 @@ class ModelAdd:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, model1, model2): def merge(self, model1, model2):
m = model1.clone() m = model1.clone()
@ -78,7 +78,7 @@ class CLIPMergeSimple:
RETURN_TYPES = ("CLIP",) RETURN_TYPES = ("CLIP",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, clip1, clip2, ratio): def merge(self, clip1, clip2, ratio):
m = clip1.clone() m = clip1.clone()
@ -101,7 +101,7 @@ class CLIPSubtract:
RETURN_TYPES = ("CLIP",) RETURN_TYPES = ("CLIP",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, clip1, clip2, multiplier): def merge(self, clip1, clip2, multiplier):
m = clip1.clone() m = clip1.clone()
@ -123,7 +123,7 @@ class CLIPAdd:
RETURN_TYPES = ("CLIP",) RETURN_TYPES = ("CLIP",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, clip1, clip2): def merge(self, clip1, clip2):
m = clip1.clone() m = clip1.clone()
@ -147,7 +147,7 @@ class ModelMergeBlocks:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "merge" FUNCTION = "merge"
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def merge(self, model1, model2, **kwargs): def merge(self, model1, model2, **kwargs):
m = model1.clone() m = model1.clone()
@ -242,7 +242,7 @@ class CheckpointSave:
FUNCTION = "save" FUNCTION = "save"
OUTPUT_NODE = True OUTPUT_NODE = True
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def save(self, model, clip, vae, filename_prefix, prompt=None, extra_pnginfo=None): def save(self, model, clip, vae, filename_prefix, prompt=None, extra_pnginfo=None):
save_checkpoint(model, clip=clip, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) save_checkpoint(model, clip=clip, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo)
@ -261,7 +261,7 @@ class CLIPSave:
FUNCTION = "save" FUNCTION = "save"
OUTPUT_NODE = True OUTPUT_NODE = True
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def save(self, clip, filename_prefix, prompt=None, extra_pnginfo=None): def save(self, clip, filename_prefix, prompt=None, extra_pnginfo=None):
prompt_info = "" prompt_info = ""
@ -318,7 +318,7 @@ class VAESave:
FUNCTION = "save" FUNCTION = "save"
OUTPUT_NODE = True OUTPUT_NODE = True
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def save(self, vae, filename_prefix, prompt=None, extra_pnginfo=None): def save(self, vae, filename_prefix, prompt=None, extra_pnginfo=None):
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
@ -353,7 +353,7 @@ class ModelSave:
FUNCTION = "save" FUNCTION = "save"
OUTPUT_NODE = True OUTPUT_NODE = True
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None): def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None):
save_checkpoint(model, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) save_checkpoint(model, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo)

View File

@ -1,7 +1,7 @@
import comfy_extras.nodes_model_merging import comfy_extras.nodes_model_merging
class ModelMergeSD1(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeSD1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
arg_dict = { "model1": ("MODEL",), arg_dict = { "model1": ("MODEL",),
@ -27,7 +27,7 @@ class ModelMergeSD1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
class ModelMergeSDXL(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeSDXL(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -53,7 +53,7 @@ class ModelMergeSDXL(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -77,7 +77,7 @@ class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -104,7 +104,7 @@ class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -130,7 +130,7 @@ class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -153,7 +153,7 @@ class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -175,7 +175,7 @@ class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeLTXV(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeLTXV(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -197,7 +197,7 @@ class ModelMergeLTXV(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeCosmos7B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeCosmos7B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -221,7 +221,7 @@ class ModelMergeCosmos7B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -245,7 +245,7 @@ class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
DESCRIPTION = "1.3B model has 30 blocks, 14B model has 40 blocks. Image to video model has the extra img_emb." DESCRIPTION = "1.3B model has 30 blocks, 14B model has 40 blocks. Image to video model has the extra img_emb."
@classmethod @classmethod
@ -269,7 +269,7 @@ class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks):
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -292,7 +292,7 @@ class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlo
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -315,7 +315,7 @@ class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBl
return {"required": arg_dict} return {"required": arg_dict}
class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks):
CATEGORY = "advanced/model_merging/model_specific" CATEGORY = "model/merging/model specific"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):

View File

@ -232,7 +232,7 @@ class ModelPatchLoader:
FUNCTION = "load_model_patch" FUNCTION = "load_model_patch"
EXPERIMENTAL = True EXPERIMENTAL = True
CATEGORY = "advanced/loaders" CATEGORY = "model/loaders"
def load_model_patch(self, name): def load_model_patch(self, name):
model_patch_path = folder_paths.get_full_path_or_raise("model_patches", name) model_patch_path = folder_paths.get_full_path_or_raise("model_patches", name)
@ -479,7 +479,7 @@ class QwenImageDiffsynthControlnet:
FUNCTION = "diffsynth_controlnet" FUNCTION = "diffsynth_controlnet"
EXPERIMENTAL = True EXPERIMENTAL = True
CATEGORY = "advanced/loaders/qwen" CATEGORY = "model/patch/qwen"
def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None): def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
model_patched = model.clone() model_patched = model.clone()
@ -512,7 +512,7 @@ class ZImageFunControlnet(QwenImageDiffsynthControlnet):
}, },
"optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}} "optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
CATEGORY = "advanced/loaders/zimage" CATEGORY = "model/patch/z-image"
class UsoStyleProjectorPatch: class UsoStyleProjectorPatch:
def __init__(self, model_patch, encoded_image): def __init__(self, model_patch, encoded_image):
@ -675,3 +675,11 @@ NODE_CLASS_MAPPINGS = {
"USOStyleReference": USOStyleReference, "USOStyleReference": USOStyleReference,
"SUPIRApply": SUPIRApply, "SUPIRApply": SUPIRApply,
} }
NODE_DISPLAY_NAME_MAPPINGS = {
"ModelPatchLoader": "Load Model Patch",
"QwenImageDiffsynthControlnet": "Apply Qwen Image DiffSynth ControlNet",
"ZImageFunControlnet": "Apply Z-Image Fun ControlNet",
"USOStyleReference": "Apply USO Style Reference",
"SUPIRApply": "Apply SUPIR Patch",
}

View File

@ -14,10 +14,8 @@ class PiDConditioning(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="PiDConditioning", node_id="PiDConditioning",
display_name="PiD Conditioning", display_name="PiD Conditioning",
category="advanced/conditioning", category="model/conditioning",
description=( description=("Attaches a latent and a degrade_sigma scalar to a CONDITIONING for PiD decoding/upscaling"),
"Attaches a latent and a degrade_sigma scalar to a CONDITIONING for PiD decoding/upscaling"
),
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Latent.Input("latent", tooltip="latent (from VAEEncode or a KSampler)."), io.Latent.Input("latent", tooltip="latent (from VAEEncode or a KSampler)."),

View File

@ -7,8 +7,9 @@ class CLIPTextEncodePixArtAlpha(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodePixArtAlpha", node_id="CLIPTextEncodePixArtAlpha",
display_name="CLIP Text Encode (PixArt Alpha)",
search_aliases=["pixart prompt"], search_aliases=["pixart prompt"],
category="advanced/conditioning", category="model/conditioning/pixart",
description="Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma.", description="Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma.",
inputs=[ inputs=[
io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),

View File

@ -616,7 +616,7 @@ class BatchLatentsNode(io.ComfyNode):
node_id="BatchLatentsNode", node_id="BatchLatentsNode",
search_aliases=["combine latents", "stack latents", "merge latents"], search_aliases=["combine latents", "stack latents", "merge latents"],
display_name="Batch Latents", display_name="Batch Latents",
category="model/latent", category="model/latent/batch",
inputs=[ inputs=[
io.Autogrow.Input("latents", template=autogrow_template) io.Autogrow.Input("latents", template=autogrow_template)
], ],

View File

@ -12,7 +12,7 @@ class TextEncodeQwenImageEdit(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TextEncodeQwenImageEdit", node_id="TextEncodeQwenImageEdit",
category="advanced/conditioning", category="model/conditioning/qwen image",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("prompt", multiline=True, dynamic_prompts=True), io.String.Input("prompt", multiline=True, dynamic_prompts=True),
@ -55,7 +55,7 @@ class TextEncodeQwenImageEditPlus(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TextEncodeQwenImageEditPlus", node_id="TextEncodeQwenImageEditPlus",
category="advanced/conditioning", category="model/conditioning/qwen image",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("prompt", multiline=True, dynamic_prompts=True), io.String.Input("prompt", multiline=True, dynamic_prompts=True),

View File

@ -14,7 +14,7 @@ class RTDETR_detect(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="RTDETR_detect", node_id="RTDETR_detect",
display_name="RT-DETR Detect", display_name="Run Real-Time Detection (RT-DETR)",
category="image/detection", category="image/detection",
search_aliases=["bbox", "bounding box", "object detection", "coco"], search_aliases=["bbox", "bounding box", "object detection", "coco"],
inputs=[ inputs=[

View File

@ -264,7 +264,7 @@ class SAM3_VideoTrack(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SAM3_VideoTrack", node_id="SAM3_VideoTrack",
display_name="SAM3 Video Track", display_name="Run SAM3 Video Track",
category="image/detection", category="image/detection",
search_aliases=["sam3", "video", "track", "propagate"], search_aliases=["sam3", "video", "track", "propagate"],
inputs=[ inputs=[

View File

@ -34,14 +34,20 @@ def _unpack(track_data):
return unpack_masks(packed) return unpack_masks(packed)
def _first_frame_cx_area(masks_bool): def _first_appearance_cx_area(masks_bool):
first = masks_bool[0].float() """Per object: first frame it appears in, plus centroid-x and area in that frame."""
H, W = first.shape[-2], first.shape[-1] m = masks_bool.float()
n_pixels = H * W T, H, W = m.shape[0], m.shape[-2], m.shape[-1]
grid_x = torch.arange(W, device=first.device, dtype=first.dtype).view(1, W) grid_x = torch.arange(W, device=m.device, dtype=m.dtype).view(1, 1, 1, W)
area = first.sum(dim=(-1, -2)).clamp_(min=1) area_t = m.sum(dim=(-1, -2))
cx = (first * grid_x).sum(dim=(-1, -2)) / area cx_t = (m * grid_x).sum(dim=(-1, -2)) / area_t.clamp(min=1)
return (cx / W).tolist(), (area / n_pixels).tolist() present = area_t > 0
frame_idx = torch.arange(T, device=m.device).unsqueeze(1)
first_t = torch.where(present, frame_idx, T).amin(dim=0)
sel = first_t.clamp(max=T - 1).unsqueeze(0)
cx = cx_t.gather(0, sel).squeeze(0)
area = area_t.gather(0, sel).squeeze(0)
return first_t.tolist(), (cx / W).tolist(), (area / (H * W)).tolist()
def _subset_track_data(track_data, obj_indices): def _subset_track_data(track_data, obj_indices):
@ -81,12 +87,26 @@ def _render_colored_masks(track_data, background="black"):
masks_full.view(T * N_obj, 1, Hm, Wm), size=(H, W), mode="nearest" masks_full.view(T * N_obj, 1, Hm, Wm), size=(H, W), mode="nearest"
).view(T, N_obj, H, W) > 0.5 ).view(T, N_obj, H, W) > 0.5
any_mask = masks_full.any(dim=1) any_mask = masks_full.any(dim=1)
obj_idx_map = masks_full.to(torch.uint8).argmax(dim=1) color_overlay = colors[masks_full.to(torch.uint8).argmax(dim=1)]
color_overlay = colors[obj_idx_map]
bg_tensor = torch.tensor(bg_rgb, device=device, dtype=color_overlay.dtype).view(1, 1, 1, 3) bg_tensor = torch.tensor(bg_rgb, device=device, dtype=color_overlay.dtype).view(1, 1, 1, 3)
return torch.where(any_mask.unsqueeze(-1), color_overlay, bg_tensor.expand_as(color_overlay)) return torch.where(any_mask.unsqueeze(-1), color_overlay, bg_tensor.expand_as(color_overlay))
def _render_mask_as_identity(mask, background="black"):
"""Plain comfy MASK (B,H,W) or (H,W) -> (B,H,W,3) rendered as a single identity (palette[0])
on the given background. A batch is treated as multiple views of that one subject."""
device = comfy.model_management.intermediate_device()
dtype = comfy.model_management.intermediate_dtype()
if mask.ndim == 2:
mask = mask.unsqueeze(0)
mask = mask.to(device=device, dtype=dtype)
B, H, W = mask.shape
bg_rgb = (1.0, 1.0, 1.0) if background.startswith("white") else (0.0, 0.0, 0.0)
color = torch.tensor(DEFAULT_PALETTE[0], device=device, dtype=dtype).view(1, 1, 1, 3)
bg = torch.tensor(bg_rgb, device=device, dtype=dtype).view(1, 1, 1, 3)
return torch.where((mask > 0.5).unsqueeze(-1), color.expand(B, H, W, 3), bg.expand(B, H, W, 3))
def _extract_mask_to_28ch(rgb_video): def _extract_mask_to_28ch(rgb_video):
"""Colored RGB mask (T, H, W, 3) in [0, 1] -> SCAIL-2 28-channel binary latent """Colored RGB mask (T, H, W, 3) in [0, 1] -> SCAIL-2 28-channel binary latent
(1, T_lat, 28, H_lat, W_lat). 7 per-color binary channels (white/r/g/b/y/m/c) (1, T_lat, 28, H_lat, W_lat). 7 per-color binary channels (white/r/g/b/y/m/c)
@ -123,7 +143,7 @@ class WanSCAILToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanSCAILToVideo", node_id="WanSCAILToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/scail",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -138,8 +158,8 @@ class WanSCAILToVideo(io.ComfyNode):
io.Float.Input("pose_strength", default=1.0, min=0.0, max=10.0, step=0.01, tooltip="Strength of the pose latent."), io.Float.Input("pose_strength", default=1.0, min=0.0, max=10.0, step=0.01, tooltip="Strength of the pose latent."),
io.Float.Input("pose_start", default=0.0, min=0.0, max=1.0, step=0.01, tooltip="Start step of the pose conditioning."), io.Float.Input("pose_start", default=0.0, min=0.0, max=1.0, step=0.01, tooltip="Start step of the pose conditioning."),
io.Float.Input("pose_end", default=1.0, min=0.0, max=1.0, step=0.01, tooltip="End step of the pose conditioning."), io.Float.Input("pose_end", default=1.0, min=0.0, max=1.0, step=0.01, tooltip="End step of the pose conditioning."),
io.Image.Input("reference_image", optional=True, tooltip="Reference image, for multiple references composite all on single image."), io.Image.Input("reference_image", optional=True, tooltip="Reference image. The first image is the primary reference (composite all identities onto it). SCAIL-2: extra batch images are used as additional views (back view, close-up, occluded background), each needing a matching reference_image_mask in that identity's color."),
io.Image.Input("reference_image_mask", optional=True, tooltip="SCAIL-2 only. Colored reference mask at the same resolution as reference_image."), io.Image.Input("reference_image_mask", optional=True, tooltip="SCAIL-2 only. Colored reference mask, batch matching reference_image (first = primary reference mask, rest = identity masks for the additional reference_image)."),
io.ClipVisionOutput.Input("clip_vision_output", optional=True, tooltip="CLIP vision features for conditioning. Model is trained with stretch resize to aspect ratio."), io.ClipVisionOutput.Input("clip_vision_output", optional=True, tooltip="CLIP vision features for conditioning. Model is trained with stretch resize to aspect ratio."),
io.Int.Input("video_frame_offset", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1, tooltip="Cumulative output frame this chunk begins at. Wire from the previous chunk's video_frame_offset output."), io.Int.Input("video_frame_offset", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1, tooltip="Cumulative output frame this chunk begins at. Wire from the previous chunk's video_frame_offset output."),
io.Int.Input("previous_frame_count", default=5, min=1, max=nodes.MAX_RESOLUTION, step=4, tooltip="Tail frames of previous_frames to anchor. SCAIL-2 trained at 5 (81-frame chunks, 76-frame step)."), io.Int.Input("previous_frame_count", default=5, min=1, max=nodes.MAX_RESOLUTION, step=4, tooltip="Tail frames of previous_frames to anchor. SCAIL-2 trained at 5 (81-frame chunks, 76-frame step)."),
@ -171,19 +191,21 @@ class WanSCAILToVideo(io.ComfyNode):
video_frame_offset -= prev_trimmed.shape[0] video_frame_offset -= prev_trimmed.shape[0]
video_frame_offset = max(0, video_frame_offset) video_frame_offset = max(0, video_frame_offset)
ref_latent = None
if reference_image is not None: if reference_image is not None:
reference_image = comfy.utils.common_upscale(reference_image[:1].movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1) ref_imgs = comfy.utils.common_upscale(reference_image.movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1)
# Replacement Mode: composite ref on black bg using reference_image_mask as alpha matte n_ref = ref_imgs.shape[0]
if replacement_mode and reference_image_mask is not None: # SCAIL-2 multi-reference: the first image is the primary ref, the rest are additional references.
rm = comfy.utils.common_upscale(reference_image_mask[:1].movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1)
is_char = (rm[..., :3].max(dim=-1, keepdim=True).values > 0.1).to(reference_image.dtype)
reference_image = reference_image * is_char
ref_latent = vae.encode(reference_image[:, :, :, :3])
if ref_latent is not None: # Replacement Mode: composite each ref on black bg using its mask as alpha matte
positive = node_helpers.conditioning_set_values(positive, {"reference_latents": [ref_latent]}, append=True) if replacement_mode and reference_image_mask is not None:
negative = node_helpers.conditioning_set_values(negative, {"reference_latents": [ref_latent]}, append=True) rm = comfy.utils.common_upscale(reference_image_mask.movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1)
rm = rm[[min(i, rm.shape[0] - 1) for i in range(n_ref)]]
is_char = (rm[..., :3].max(dim=-1, keepdim=True).values > 0.1).to(ref_imgs.dtype)
ref_imgs = ref_imgs * is_char
# encode each ref individually so each stays a single latent frame (a batched encode would be treated as a video)
ref_latents = [vae.encode(ref_imgs[i:i + 1, :, :, :3]) for i in range(n_ref)]
positive = node_helpers.conditioning_set_values(positive, {"reference_latents": ref_latents}, append=True)
negative = node_helpers.conditioning_set_values(negative, {"reference_latents": ref_latents}, append=True)
if clip_vision_output is not None: if clip_vision_output is not None:
positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output}) positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
@ -221,11 +243,16 @@ class WanSCAILToVideo(io.ComfyNode):
positive = node_helpers.conditioning_set_values(positive, {"driving_mask_28ch": driving_mask_28ch}) positive = node_helpers.conditioning_set_values(positive, {"driving_mask_28ch": driving_mask_28ch})
negative = node_helpers.conditioning_set_values(negative, {"driving_mask_28ch": driving_mask_28ch}) negative = node_helpers.conditioning_set_values(negative, {"driving_mask_28ch": driving_mask_28ch})
if reference_image_mask is not None: # The ref mask binds reference frames to identities, so it only applies when there's a reference image.
ref_mask_hw = comfy.utils.common_upscale(reference_image_mask[:1].movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1) if reference_image_mask is not None and reference_image is not None:
ref_mask_1f = _extract_mask_to_28ch(ref_mask_hw) ref_mask_hw = comfy.utils.common_upscale(reference_image_mask.movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1)
n_masks = ref_mask_hw.shape[0]
n_ref = reference_image.shape[0]
add_masks = [_extract_mask_to_28ch(ref_mask_hw[min(i, n_masks - 1)][None]) for i in range(1, n_ref)]
ref_mask_1f = _extract_mask_to_28ch(ref_mask_hw[:1])
zeros = torch.zeros((1, latent.shape[2], 28, ref_mask_1f.shape[-2], ref_mask_1f.shape[-1]), device=ref_mask_1f.device, dtype=ref_mask_1f.dtype) zeros = torch.zeros((1, latent.shape[2], 28, ref_mask_1f.shape[-2], ref_mask_1f.shape[-1]), device=ref_mask_1f.device, dtype=ref_mask_1f.dtype)
ref_mask_28ch = torch.cat([ref_mask_1f, zeros], dim=1) ref_mask_28ch = torch.cat(add_masks + [ref_mask_1f, zeros], dim=1)
positive = node_helpers.conditioning_set_values(positive, {"ref_mask_28ch": ref_mask_28ch}) positive = node_helpers.conditioning_set_values(positive, {"ref_mask_28ch": ref_mask_28ch})
negative = node_helpers.conditioning_set_values(negative, {"ref_mask_28ch": ref_mask_28ch}) negative = node_helpers.conditioning_set_values(negative, {"ref_mask_28ch": ref_mask_28ch})
@ -244,12 +271,9 @@ class WanSCAILToVideo(io.ComfyNode):
class SCAIL2ColoredMask(io.ComfyNode): class SCAIL2ColoredMask(io.ComfyNode):
"""Render SAM3 tracks for the driving pose video and (optionally) the reference """Render SAM3 tracks for the driving pose video and reference image(s) into the
image into the two colored masks WanSCAILToVideo consumes. Shared `sort_by` colored masks WanSCAILToVideo consumes. Shared `sort_by` keeps each identity on the
across both outputs guarantees identity K maps to the same color on both same color across both outputs.
sides, for multi-person workflow consistency.
reference_image_mask is always rendered black-bg (model convention)
pose_video_mask bg follows replacement_mode: black = Animation Mode, white = Replacement Mode
""" """
@classmethod @classmethod
@ -257,18 +281,18 @@ class SCAIL2ColoredMask(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="SCAIL2ColoredMask", node_id="SCAIL2ColoredMask",
display_name="Create SCAIL-2 Colored Mask", display_name="Create SCAIL-2 Colored Mask",
category="conditioning/video_models/scail", category="model/conditioning/wan/scail",
inputs=[ inputs=[
SAM3TrackData.Input("driving_track_data", tooltip="SAM3 track of the driving pose video. Will be rendered into the pose_video_mask output."), SAM3TrackData.Input("driving_track_data", tooltip="SAM3 track of the driving pose video. Will be rendered into the pose_video_mask output."),
SAM3TrackData.Input("ref_track_data", optional=True, io.MultiType.Input("ref_track_data", [SAM3TrackData, io.Mask], optional=True, display_name="reference_masks",
tooltip="SAM3 track of the reference image."), tooltip="SAM3 track of the reference image(s) (one identity per object, colored in batch order), or a plain MASK of the reference subject (rendered as a single identity)."),
io.String.Input("object_indices", default="", io.String.Input("object_indices", default="",
tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."), tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."),
io.Combo.Input("sort_by", options=["none", "left_to_right", "area"], default="left_to_right", io.Combo.Input("sort_by", options=["none", "left_to_right", "area"], default="left_to_right",
tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). left_to_right = leftmost object (by first-frame centroid) gets the first color; area = biggest object (by first-frame mask area) gets the first color; none = keep SAM3's order."), tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). Objects that appear in earlier frames always come first; within a frame, left_to_right = leftmost object (by centroid at first appearance) gets the first color, area = biggest object (by mask area at first appearance) gets the first color; none = keep SAM3's order."),
io.Boolean.Input("replacement_mode", default=False, io.Boolean.Input("replacement_mode", default=False,
tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). " tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). "
"True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."), "True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."),
], ],
outputs=[ outputs=[
io.Image.Output("pose_video_mask"), io.Image.Output("pose_video_mask"),
@ -282,11 +306,11 @@ class SCAIL2ColoredMask(io.ComfyNode):
def _prep(td): def _prep(td):
masks_bool = _unpack(td) masks_bool = _unpack(td)
if sort_by != "none" and masks_bool is not None: if sort_by != "none" and masks_bool is not None:
cx, area = _first_frame_cx_area(masks_bool) first_t, cx, area = _first_appearance_cx_area(masks_bool)
if sort_by == "left_to_right": if sort_by == "left_to_right":
order = sorted(range(len(cx)), key=lambda i: cx[i]) order = sorted(range(len(cx)), key=lambda i: (first_t[i], cx[i]))
else: # "area" else: # "area"
order = sorted(range(len(area)), key=lambda i: -area[i]) order = sorted(range(len(area)), key=lambda i: (first_t[i], -area[i]))
td = _subset_track_data(td, order) td = _subset_track_data(td, order)
if object_indices.strip(): if object_indices.strip():
indices = [int(i.strip()) for i in object_indices.split(",") if i.strip().isdigit()] indices = [int(i.strip()) for i in object_indices.split(",") if i.strip().isdigit()]
@ -302,8 +326,10 @@ class SCAIL2ColoredMask(io.ComfyNode):
ref_bg = "black" if replacement_mode else "white" ref_bg = "black" if replacement_mode else "white"
if ref_track_data is not None: if ref_track_data is not None:
ref = _prep(ref_track_data) if isinstance(ref_track_data, torch.Tensor): # plain comfy MASK
reference_image_mask = _render_colored_masks(ref, ref_bg) reference_image_mask = _render_mask_as_identity(ref_track_data, ref_bg)
else:
reference_image_mask = _render_colored_masks(_prep(ref_track_data), ref_bg)
else: else:
H, W = drv["orig_size"] H, W = drv["orig_size"]
fill_value = 1.0 if ref_bg == "white" else 0.0 fill_value = 1.0 if ref_bg == "white" else 0.0

View File

@ -13,8 +13,9 @@ class TripleCLIPLoader(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TripleCLIPLoader", node_id="TripleCLIPLoader",
category="advanced/loaders", display_name="Load CLIP (Triple)",
description="[Recipes]\n\nsd3: clip-l, clip-g, t5", category="model/loaders",
description="Recipes:\nsd3: clip-l, clip-g, t5",
inputs=[ inputs=[
io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")),
io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")),
@ -41,7 +42,7 @@ class EmptySD3LatentImage(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="EmptySD3LatentImage", node_id="EmptySD3LatentImage",
category="model/latent/sd3", category="model/latent/stable diffusion",
inputs=[ inputs=[
io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16),
@ -66,7 +67,8 @@ class CLIPTextEncodeSD3(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="CLIPTextEncodeSD3", node_id="CLIPTextEncodeSD3",
search_aliases=["sd3 prompt"], search_aliases=["sd3 prompt"],
category="advanced/conditioning", display_name="CLIP Text Encode (SD3)",
category="model/conditioning/stable diffusion",
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),
io.String.Input("clip_l", multiline=True, dynamic_prompts=True), io.String.Input("clip_l", multiline=True, dynamic_prompts=True),

View File

@ -96,8 +96,12 @@ class KeypointDraw:
# Body connections - matching DWPose limbSeq (1-indexed, converted to 0-indexed) # Body connections - matching DWPose limbSeq (1-indexed, converted to 0-indexed)
self.body_limbSeq = [ self.body_limbSeq = [
[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10],
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], [10, 11], [2, 12], [12, 13], [13, 14]
[1, 16], [16, 18] ]
# Head connections (1-indexed, converted to 0-indexed)
self.head_edges = [
[2, 1], [1, 15], [15, 17], [1, 16], [16, 18]
] ]
# Colors matching DWPose # Colors matching DWPose
@ -215,7 +219,7 @@ class KeypointDraw:
return unique_pts if len(unique_pts) > 1 else [[center[0], center[1]], [center[0], center[1]]] return unique_pts if len(unique_pts) > 1 else [[center[0], center[1]], [center[0], center[1]]]
def draw_wholebody_keypoints(self, canvas, keypoints, scores=None, threshold=0.3, def draw_wholebody_keypoints(self, canvas, keypoints, scores=None, threshold=0.3,
draw_body=True, draw_feet=True, draw_face=True, draw_hands=True, stick_width=4, face_point_size=3): draw_body=True, draw_head=True, draw_feet=True, draw_face=True, draw_hands=True, stick_width=4, face_point_size=3):
""" """
Draw wholebody keypoints (134 keypoints after processing) in DWPose style. Draw wholebody keypoints (134 keypoints after processing) in DWPose style.
@ -237,9 +241,17 @@ class KeypointDraw:
""" """
H, W, C = canvas.shape H, W, C = canvas.shape
# Draw body limbs # Draw body limbs & head connections
if draw_body and len(keypoints) >= 18: if (draw_body or draw_head) and len(keypoints) >= 18:
for i, limb in enumerate(self.body_limbSeq): colorIndexOffset = 0
edges = []
if draw_body:
edges += self.body_limbSeq
else:
colorIndexOffset += len(self.body_limbSeq)
if draw_head:
edges += self.head_edges
for i, limb in enumerate(edges):
# Convert from 1-indexed to 0-indexed # Convert from 1-indexed to 0-indexed
idx1, idx2 = limb[0] - 1, limb[1] - 1 idx1, idx2 = limb[0] - 1, limb[1] - 1
@ -262,11 +274,17 @@ class KeypointDraw:
polygon = self.draw.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stick_width), int(angle), 0, 360, 1) polygon = self.draw.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stick_width), int(angle), 0, 360, 1)
self.draw.fillConvexPoly(canvas, polygon, self.colors[i % len(self.colors)]) self.draw.fillConvexPoly(canvas, polygon, self.colors[(i + colorIndexOffset) % len(self.colors)])
# Draw body keypoints # Draw body & head keypoints
if draw_body and len(keypoints) >= 18: if (draw_body or draw_head) and len(keypoints) >= 18:
head_keypoints = {0, 14, 15, 16, 17} # nose, eyes, ears
neck_point = 1
for i in range(18): for i in range(18):
if not draw_head and i in head_keypoints:
continue
if not draw_body and i not in head_keypoints and i != neck_point:
continue
if scores is not None and scores[i] < threshold: if scores is not None and scores[i] < threshold:
continue continue
x, y = int(keypoints[i][0]), int(keypoints[i][1]) x, y = int(keypoints[i][0]), int(keypoints[i][1])
@ -365,6 +383,7 @@ class SDPoseDrawKeypoints(io.ComfyNode):
io.Int.Input("stick_width", default=4, min=1, max=10, step=1), io.Int.Input("stick_width", default=4, min=1, max=10, step=1),
io.Int.Input("face_point_size", default=3, min=1, max=10, step=1), io.Int.Input("face_point_size", default=3, min=1, max=10, step=1),
io.Float.Input("score_threshold", default=0.3, min=0.0, max=1.0, step=0.01), io.Float.Input("score_threshold", default=0.3, min=0.0, max=1.0, step=0.01),
io.Boolean.Input("draw_head", default=True),
], ],
outputs=[ outputs=[
io.Image.Output(), io.Image.Output(),
@ -372,7 +391,7 @@ class SDPoseDrawKeypoints(io.ComfyNode):
) )
@classmethod @classmethod
def execute(cls, keypoints, draw_body, draw_hands, draw_face, draw_feet, stick_width, face_point_size, score_threshold) -> io.NodeOutput: def execute(cls, keypoints, draw_body, draw_hands, draw_face, draw_feet, stick_width, face_point_size, score_threshold, draw_head) -> io.NodeOutput:
if not keypoints: if not keypoints:
return io.NodeOutput(torch.zeros((1, 64, 64, 3), dtype=torch.float32)) return io.NodeOutput(torch.zeros((1, 64, 64, 3), dtype=torch.float32))
height = keypoints[0]["canvas_height"] height = keypoints[0]["canvas_height"]
@ -405,7 +424,7 @@ class SDPoseDrawKeypoints(io.ComfyNode):
canvas = drawer.draw_wholebody_keypoints( canvas = drawer.draw_wholebody_keypoints(
canvas, kp, sc, canvas, kp, sc,
threshold=score_threshold, threshold=score_threshold,
draw_body=draw_body, draw_feet=draw_feet, draw_body=draw_body, draw_head=draw_head, draw_feet=draw_feet,
draw_face=draw_face, draw_hands=draw_hands, draw_face=draw_face, draw_hands=draw_hands,
stick_width=stick_width, face_point_size=face_point_size, stick_width=stick_width, face_point_size=face_point_size,
) )

View File

@ -9,7 +9,7 @@ class SD_4XUpscale_Conditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SD_4XUpscale_Conditioning", node_id="SD_4XUpscale_Conditioning",
category="model/conditioning/upscale_diffusion", category="model/conditioning/stable diffusion upscaler",
inputs=[ inputs=[
io.Image.Input("images"), io.Image.Input("images"),
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),

View File

@ -27,7 +27,7 @@ class StableZero123_Conditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="StableZero123_Conditioning", node_id="StableZero123_Conditioning",
category="model/conditioning/3d_models", category="model/conditioning/stable zero123",
inputs=[ inputs=[
io.ClipVision.Input("clip_vision"), io.ClipVision.Input("clip_vision"),
io.Image.Input("init_image"), io.Image.Input("init_image"),
@ -65,7 +65,7 @@ class StableZero123_Conditioning_Batched(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="StableZero123_Conditioning_Batched", node_id="StableZero123_Conditioning_Batched",
category="model/conditioning/3d_models", category="model/conditioning/stable zero123",
inputs=[ inputs=[
io.ClipVision.Input("clip_vision"), io.ClipVision.Input("clip_vision"),
io.Image.Input("init_image"), io.Image.Input("init_image"),
@ -112,7 +112,7 @@ class SV3D_Conditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="SV3D_Conditioning", node_id="SV3D_Conditioning",
category="model/conditioning/3d_models", category="model/conditioning/stable video 3d",
inputs=[ inputs=[
io.ClipVision.Input("clip_vision"), io.ClipVision.Input("clip_vision"),
io.Image.Input("init_image"), io.Image.Input("init_image"),

View File

@ -29,7 +29,7 @@ class StableCascade_EmptyLatentImage(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="StableCascade_EmptyLatentImage", node_id="StableCascade_EmptyLatentImage",
category="model/latent/stable_cascade", category="model/latent/stable cascade",
inputs=[ inputs=[
io.Int.Input("width", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), io.Int.Input("width", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8),
io.Int.Input("height", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), io.Int.Input("height", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8),
@ -58,7 +58,7 @@ class StableCascade_StageC_VAEEncode(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="StableCascade_StageC_VAEEncode", node_id="StableCascade_StageC_VAEEncode",
category="model/latent/stable_cascade", category="model/latent/stable cascade",
inputs=[ inputs=[
io.Image.Input("image"), io.Image.Input("image"),
io.Vae.Input("vae"), io.Vae.Input("vae"),
@ -93,7 +93,7 @@ class StableCascade_StageB_Conditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="StableCascade_StageB_Conditioning", node_id="StableCascade_StageB_Conditioning",
category="model/conditioning/stable_cascade", category="model/conditioning/stable cascade",
inputs=[ inputs=[
io.Conditioning.Input("conditioning"), io.Conditioning.Input("conditioning"),
io.Latent.Input("stage_c"), io.Latent.Input("stage_c"),

View File

@ -35,7 +35,7 @@ class TextGenerate(io.ComfyNode):
io.Image.Input("image", optional=True), io.Image.Input("image", optional=True),
io.Image.Input("video", optional=True, tooltip="Video frames as image batch. Assumed to be 24 FPS; subsampled to 1 FPS internally."), io.Image.Input("video", optional=True, tooltip="Video frames as image batch. Assumed to be 24 FPS; subsampled to 1 FPS internally."),
io.Audio.Input("audio", optional=True), io.Audio.Input("audio", optional=True),
io.Int.Input("max_length", default=256, min=1, max=2048), io.Int.Input("max_length", default=512, min=1, max=32768),
io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"), io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"),
io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."), io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."),
io.Boolean.Input("use_default_template", optional=True, default=True, tooltip="Use the built in system prompt/template if the model has one.", advanced=True), io.Boolean.Input("use_default_template", optional=True, default=True, tooltip="Use the built in system prompt/template if the model has one.", advanced=True),

View File

@ -1367,7 +1367,7 @@ class SaveLoRA(io.ComfyNode):
node_id="SaveLoRA", node_id="SaveLoRA",
search_aliases=["export lora"], search_aliases=["export lora"],
display_name="Save LoRA Weights", display_name="Save LoRA Weights",
category="advanced/model_merging", category="model/merging",
is_experimental=True, is_experimental=True,
is_output_node=True, is_output_node=True,
inputs=[ inputs=[

View File

@ -65,7 +65,7 @@ class TripoSplatPreprocessImage(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="TripoSplatPreprocessImage", node_id="TripoSplatPreprocessImage",
display_name="TripoSplat Preprocess Image", display_name="TripoSplat Preprocess Image",
category="3d/conditioning", category="model/conditioning/triposplat",
description="Crop center each image to a square canvas on a black background and add padding.", description="Crop center each image to a square canvas on a black background and add padding.",
inputs=[ inputs=[
IO.Image.Input("image"), IO.Image.Input("image"),
@ -95,7 +95,7 @@ class TripoSplatConditioning(IO.ComfyNode):
return IO.Schema( return IO.Schema(
node_id="TripoSplatConditioning", node_id="TripoSplatConditioning",
display_name="TripoSplat Conditioning", display_name="TripoSplat Conditioning",
category="3d/conditioning", category="model/conditioning/triposplat",
description="Encode the image with DINOv3 and the Flux2 VAE into TripoSplat positive/negative " description="Encode the image with DINOv3 and the Flux2 VAE into TripoSplat positive/negative "
"conditioning, and create the fixed size noise target (latent + camera) for the KSampler", "conditioning, and create the fixed size noise target (latent + camera) for the KSampler",
inputs=[ inputs=[

View File

@ -41,7 +41,7 @@ class SVD_img2vid_Conditioning:
FUNCTION = "encode" FUNCTION = "encode"
CATEGORY = "model/conditioning/video_models" CATEGORY = "model/conditioning/stable video"
def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level): def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level):
output = clip_vision.encode_image(init_image) output = clip_vision.encode_image(init_image)
@ -108,7 +108,7 @@ class VideoTriangleCFGGuidance:
return (m, ) return (m, )
class ImageOnlyCheckpointSave(comfy_extras.nodes_model_merging.CheckpointSave): class ImageOnlyCheckpointSave(comfy_extras.nodes_model_merging.CheckpointSave):
CATEGORY = "advanced/model_merging" CATEGORY = "model/merging"
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -138,7 +138,7 @@ class ConditioningSetAreaPercentageVideo:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append" FUNCTION = "append"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def append(self, conditioning, width, height, temporal, x, y, z, strength): def append(self, conditioning, width, height, temporal, x, y, z, strength):
c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", temporal, height, width, z, y, x), c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", temporal, height, width, z, y, x),
@ -160,4 +160,5 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"ImageOnlyCheckpointLoader": "Load Checkpoint Image Only (img2vid model)", "ImageOnlyCheckpointLoader": "Load Checkpoint Image Only (img2vid model)",
"VideoLinearCFGGuidance": "Video Linear CFG Guidance", "VideoLinearCFGGuidance": "Video Linear CFG Guidance",
"VideoTriangleCFGGuidance": "Video Triangle CFG Guidance", "VideoTriangleCFGGuidance": "Video Triangle CFG Guidance",
"ConditioningSetAreaPercentageVideo": "Conditioning (Set Area with Percentage for Video)",
} }

View File

@ -175,7 +175,7 @@ class VOIDInpaintConditioning(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="VOIDInpaintConditioning", node_id="VOIDInpaintConditioning",
category="model/conditioning/video_models", category="model/conditioning/void",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -288,7 +288,7 @@ class VOIDWarpedNoise(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="VOIDWarpedNoise", node_id="VOIDWarpedNoise",
category="model/latent/video", category="model/latent/void",
inputs=[ inputs=[
OpticalFlow.Input( OpticalFlow.Input(
"optical_flow", "optical_flow",
@ -393,7 +393,7 @@ class VOIDWarpedNoiseSource(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="VOIDWarpedNoiseSource", node_id="VOIDWarpedNoiseSource",
category="model/sampling/noise", category="model/latent/void",
inputs=[ inputs=[
io.Latent.Input("warped_noise", io.Latent.Input("warped_noise",
tooltip="Warped noise latent from VOIDWarpedNoise"), tooltip="Warped noise latent from VOIDWarpedNoise"),

View File

@ -18,7 +18,7 @@ class WanImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanImageToVideo", node_id="WanImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -66,7 +66,7 @@ class WanFunControlToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanFunControlToVideo", node_id="WanFunControlToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/fun control",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -119,7 +119,7 @@ class Wan22FunControlToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="Wan22FunControlToVideo", node_id="Wan22FunControlToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/fun control",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -184,7 +184,7 @@ class WanFirstLastFrameToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanFirstLastFrameToVideo", node_id="WanFirstLastFrameToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -256,7 +256,7 @@ class WanFunInpaintToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanFunInpaintToVideo", node_id="WanFunInpaintToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/fun inpaint",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -288,7 +288,7 @@ class WanVaceToVideo(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="WanVaceToVideo", node_id="WanVaceToVideo",
search_aliases=["video conditioning", "video control"], search_aliases=["video conditioning", "video control"],
category="model/conditioning/video_models", category="model/conditioning/wan/vace",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -375,7 +375,8 @@ class TrimVideoLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TrimVideoLatent", node_id="TrimVideoLatent",
category="model/latent/video", display_name="Trim Video Latent",
category="model/latent",
inputs=[ inputs=[
io.Latent.Input("samples"), io.Latent.Input("samples"),
io.Int.Input("trim_amount", default=0, min=0, max=99999), io.Int.Input("trim_amount", default=0, min=0, max=99999),
@ -398,7 +399,7 @@ class WanCameraImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanCameraImageToVideo", node_id="WanCameraImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/camera",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -452,7 +453,7 @@ class WanPhantomSubjectToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanPhantomSubjectToVideo", node_id="WanPhantomSubjectToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/phantom subject",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -707,7 +708,7 @@ class WanTrackToVideo(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="WanTrackToVideo", node_id="WanTrackToVideo",
search_aliases=["motion tracking", "trajectory video", "point tracking", "keypoint animation"], search_aliases=["motion tracking", "trajectory video", "point tracking", "keypoint animation"],
category="model/conditioning/video_models", category="model/conditioning/wan/move",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -951,7 +952,7 @@ class WanSoundImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanSoundImageToVideo", node_id="WanSoundImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/sound",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -984,7 +985,7 @@ class WanSoundImageToVideoExtend(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanSoundImageToVideoExtend", node_id="WanSoundImageToVideoExtend",
category="model/conditioning/video_models", category="model/conditioning/wan/sound",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -1046,7 +1047,7 @@ class WanHuMoImageToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanHuMoImageToVideo", node_id="WanHuMoImageToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/humo",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -1112,7 +1113,7 @@ class WanAnimateToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanAnimateToVideo", node_id="WanAnimateToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/animate",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),
@ -1252,7 +1253,7 @@ class Wan22ImageToVideoLatent(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="Wan22ImageToVideoLatent", node_id="Wan22ImageToVideoLatent",
category="model/conditioning/inpaint", category="model/conditioning/wan",
inputs=[ inputs=[
io.Vae.Input("vae"), io.Vae.Input("vae"),
io.Int.Input("width", default=1280, min=32, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("width", default=1280, min=32, max=nodes.MAX_RESOLUTION, step=32),
@ -1302,7 +1303,7 @@ class WanInfiniteTalkToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanInfiniteTalkToVideo", node_id="WanInfiniteTalkToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/infinite talk",
inputs=[ inputs=[
io.DynamicCombo.Input("mode", options=[ io.DynamicCombo.Input("mode", options=[
io.DynamicCombo.Option("single_speaker", []), io.DynamicCombo.Option("single_speaker", []),

View File

@ -713,7 +713,7 @@ class WanDancerEncodeAudio(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanDancerEncodeAudio", node_id="WanDancerEncodeAudio",
category="model/conditioning/video_models", category="model/conditioning/wan/dancer",
inputs=[ inputs=[
io.Audio.Input("audio"), io.Audio.Input("audio"),
io.Int.Input("video_frames", default=149, min=1, max=nodes.MAX_RESOLUTION, step=4), io.Int.Input("video_frames", default=149, min=1, max=nodes.MAX_RESOLUTION, step=4),
@ -787,7 +787,7 @@ class WanDancerVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanDancerVideo", node_id="WanDancerVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/dancer",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),

View File

@ -247,7 +247,7 @@ class WanMoveVisualizeTracks(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanMoveVisualizeTracks", node_id="WanMoveVisualizeTracks",
category="model/conditioning/video_models", category="model/conditioning/wan/move",
inputs=[ inputs=[
io.Image.Input("images"), io.Image.Input("images"),
io.Tracks.Input("tracks", optional=True), io.Tracks.Input("tracks", optional=True),
@ -283,7 +283,7 @@ class WanMoveTracksFromCoords(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanMoveTracksFromCoords", node_id="WanMoveTracksFromCoords",
category="model/conditioning/video_models", category="model/conditioning/wan/move",
inputs=[ inputs=[
io.String.Input("track_coords", force_input=True, default="[]", optional=True), io.String.Input("track_coords", force_input=True, default="[]", optional=True),
io.Mask.Input("track_mask", optional=True), io.Mask.Input("track_mask", optional=True),
@ -325,7 +325,8 @@ class GenerateTracks(io.ComfyNode):
return io.Schema( return io.Schema(
node_id="GenerateTracks", node_id="GenerateTracks",
search_aliases=["motion paths", "camera movement", "trajectory"], search_aliases=["motion paths", "camera movement", "trajectory"],
category="model/conditioning/video_models", display_name="Generate Video Tracks",
category="model/conditioning/wan/move",
inputs=[ inputs=[
io.Int.Input("width", default=832, min=16, max=4096, step=16), io.Int.Input("width", default=832, min=16, max=4096, step=16),
io.Int.Input("height", default=480, min=16, max=4096, step=16), io.Int.Input("height", default=480, min=16, max=4096, step=16),
@ -434,7 +435,7 @@ class WanMoveConcatTrack(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanMoveConcatTrack", node_id="WanMoveConcatTrack",
category="model/conditioning/video_models", category="model/conditioning/wan/move",
inputs=[ inputs=[
io.Tracks.Input("tracks_1"), io.Tracks.Input("tracks_1"),
io.Tracks.Input("tracks_2", optional=True), io.Tracks.Input("tracks_2", optional=True),
@ -463,7 +464,7 @@ class WanMoveTrackToVideo(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="WanMoveTrackToVideo", node_id="WanMoveTrackToVideo",
category="model/conditioning/video_models", category="model/conditioning/wan/move",
inputs=[ inputs=[
io.Conditioning.Input("positive"), io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"), io.Conditioning.Input("negative"),

View File

@ -10,7 +10,7 @@ class TextEncodeZImageOmni(io.ComfyNode):
def define_schema(cls): def define_schema(cls):
return io.Schema( return io.Schema(
node_id="TextEncodeZImageOmni", node_id="TextEncodeZImageOmni",
category="advanced/conditioning", category="model/conditioning/z-image",
is_experimental=True, is_experimental=True,
inputs=[ inputs=[
io.Clip.Input("clip"), io.Clip.Input("clip"),

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.24.0" __version__ = "0.25.0"

49
main.py
View File

@ -55,7 +55,11 @@ if __name__ == "__main__" and args.debug_hang:
import comfy_aimdo.control import comfy_aimdo.control
if enables_dynamic_vram(): if enables_dynamic_vram():
comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3)) try:
comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3))
except TypeError:
# comfy-aimdo 0.4.9 protocol.
comfy_aimdo.control.init()
if os.name == "nt": if os.name == "nt":
os.environ['MIMALLOC_PURGE_DELAY'] = '0' os.environ['MIMALLOC_PURGE_DELAY'] = '0'
@ -123,6 +127,10 @@ def apply_custom_paths():
for config_path in itertools.chain(*args.extra_model_paths_config): for config_path in itertools.chain(*args.extra_model_paths_config):
utils.extra_config.load_extra_path_config(config_path) utils.extra_config.load_extra_path_config(config_path)
# --base-directory
if args.base_directory:
logging.info(f"Setting base directory to: {folder_paths.base_path}")
# --output-directory, --input-directory, --user-directory # --output-directory, --input-directory, --user-directory
if args.output_directory: if args.output_directory:
output_dir = os.path.abspath(args.output_directory) output_dir = os.path.abspath(args.output_directory)
@ -231,23 +239,30 @@ import comfy.model_patcher
if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()): if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)): if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
elif comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices()):
if args.verbose == 'DEBUG':
comfy_aimdo.control.set_log_debug()
elif args.verbose == 'CRITICAL':
comfy_aimdo.control.set_log_critical()
elif args.verbose == 'ERROR':
comfy_aimdo.control.set_log_error()
elif args.verbose == 'WARNING':
comfy_aimdo.control.set_log_warning()
else: #INFO
comfy_aimdo.control.set_log_info()
comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
comfy.memory_management.aimdo_enabled = True
logging.info("DynamicVRAM support detected and enabled")
else: else:
logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") try:
aimdo_initialized = comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices())
except TypeError:
# comfy-aimdo 0.4.9 protocol.
aimdo_initialized = comfy_aimdo.control.init_devices(d.index for d in comfy.model_management.get_all_torch_devices())
if aimdo_initialized:
if args.verbose == 'DEBUG':
comfy_aimdo.control.set_log_debug()
elif args.verbose == 'CRITICAL':
comfy_aimdo.control.set_log_critical()
elif args.verbose == 'ERROR':
comfy_aimdo.control.set_log_error()
elif args.verbose == 'WARNING':
comfy_aimdo.control.set_log_warning()
else: #INFO
comfy_aimdo.control.set_log_info()
comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
comfy.memory_management.aimdo_enabled = True
logging.info("DynamicVRAM support detected and enabled")
else:
logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
def cuda_malloc_warning(): def cuda_malloc_warning():

View File

@ -87,7 +87,7 @@ class ConditioningCombine:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "combine" FUNCTION = "combine"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
SEARCH_ALIASES = ["combine", "merge conditioning", "combine prompts", "merge prompts", "mix prompts", "add prompt"] SEARCH_ALIASES = ["combine", "merge conditioning", "combine prompts", "merge prompts", "mix prompts", "add prompt"]
def combine(self, conditioning_1, conditioning_2): def combine(self, conditioning_1, conditioning_2):
@ -104,7 +104,7 @@ class ConditioningAverage :
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "addWeighted" FUNCTION = "addWeighted"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength):
out = [] out = []
@ -143,7 +143,7 @@ class ConditioningConcat:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "concat" FUNCTION = "concat"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def concat(self, conditioning_to, conditioning_from): def concat(self, conditioning_to, conditioning_from):
out = [] out = []
@ -176,7 +176,7 @@ class ConditioningSetArea:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append" FUNCTION = "append"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def append(self, conditioning, width, height, x, y, strength): def append(self, conditioning, width, height, x, y, strength):
c = node_helpers.conditioning_set_values(conditioning, {"area": (height // 8, width // 8, y // 8, x // 8), c = node_helpers.conditioning_set_values(conditioning, {"area": (height // 8, width // 8, y // 8, x // 8),
@ -197,7 +197,7 @@ class ConditioningSetAreaPercentage:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append" FUNCTION = "append"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def append(self, conditioning, width, height, x, y, strength): def append(self, conditioning, width, height, x, y, strength):
c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", height, width, y, x), c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", height, width, y, x),
@ -214,7 +214,7 @@ class ConditioningSetAreaStrength:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append" FUNCTION = "append"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def append(self, conditioning, strength): def append(self, conditioning, strength):
c = node_helpers.conditioning_set_values(conditioning, {"strength": strength}) c = node_helpers.conditioning_set_values(conditioning, {"strength": strength})
@ -234,7 +234,7 @@ class ConditioningSetMask:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append" FUNCTION = "append"
CATEGORY = "model/conditioning" CATEGORY = "model/conditioning/transform"
def append(self, conditioning, mask, set_cond_area, strength): def append(self, conditioning, mask, set_cond_area, strength):
set_area_to_bounds = False set_area_to_bounds = False
@ -257,7 +257,7 @@ class ConditioningZeroOut:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "zero_out" FUNCTION = "zero_out"
CATEGORY = "advanced/conditioning" CATEGORY = "model/conditioning/transform"
def zero_out(self, conditioning): def zero_out(self, conditioning):
c = [] c = []
@ -283,11 +283,10 @@ class ConditioningSetTimestepRange:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "set_range" FUNCTION = "set_range"
CATEGORY = "advanced/conditioning" CATEGORY = "model/conditioning/transform"
def set_range(self, conditioning, start, end): def set_range(self, conditioning, start, end):
c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, "end_percent": end})
"end_percent": end})
return (c, ) return (c, )
class VAEDecode: class VAEDecode:
@ -389,7 +388,7 @@ class VAEEncodeForInpaint:
RETURN_TYPES = ("LATENT",) RETURN_TYPES = ("LATENT",)
FUNCTION = "encode" FUNCTION = "encode"
CATEGORY = "model/latent/inpaint" CATEGORY = "model/latent"
def encode(self, vae, pixels, mask, grow_mask_by=6): def encode(self, vae, pixels, mask, grow_mask_by=6):
downscale_ratio = vae.spacial_compression_encode() downscale_ratio = vae.spacial_compression_encode()
@ -438,7 +437,7 @@ class InpaintModelConditioning:
RETURN_NAMES = ("positive", "negative", "latent") RETURN_NAMES = ("positive", "negative", "latent")
FUNCTION = "encode" FUNCTION = "encode"
CATEGORY = "model/conditioning/inpaint" CATEGORY = "model/conditioning"
def encode(self, positive, negative, pixels, vae, mask, noise_mask=True): def encode(self, positive, negative, pixels, vae, mask, noise_mask=True):
x = (pixels.shape[1] // 8) * 8 x = (pixels.shape[1] // 8) * 8
@ -576,7 +575,7 @@ class CheckpointLoader:
RETURN_TYPES = ("MODEL", "CLIP", "VAE") RETURN_TYPES = ("MODEL", "CLIP", "VAE")
FUNCTION = "load_checkpoint" FUNCTION = "load_checkpoint"
CATEGORY = "advanced/loaders" CATEGORY = "model/loaders"
DEPRECATED = True DEPRECATED = True
def load_checkpoint(self, config_name, ckpt_name): def load_checkpoint(self, config_name, ckpt_name):
@ -622,8 +621,9 @@ class DiffusersLoader:
return {"required": {"model_path": (paths,), }} return {"required": {"model_path": (paths,), }}
RETURN_TYPES = ("MODEL", "CLIP", "VAE") RETURN_TYPES = ("MODEL", "CLIP", "VAE")
FUNCTION = "load_checkpoint" FUNCTION = "load_checkpoint"
DEPRECATED = True
CATEGORY = "advanced/loaders/deprecated" CATEGORY = "model/loaders"
def load_checkpoint(self, model_path, output_vae=True, output_clip=True): def load_checkpoint(self, model_path, output_vae=True, output_clip=True):
for search_path in folder_paths.get_folder_paths("diffusers"): for search_path in folder_paths.get_folder_paths("diffusers"):
@ -949,7 +949,7 @@ class UNETLoader:
RETURN_TYPES = ("MODEL",) RETURN_TYPES = ("MODEL",)
FUNCTION = "load_unet" FUNCTION = "load_unet"
CATEGORY = "advanced/loaders" CATEGORY = "model/loaders"
def load_unet(self, unet_name, weight_dtype): def load_unet(self, unet_name, weight_dtype):
model_options = {} model_options = {}
@ -977,9 +977,9 @@ class CLIPLoader:
RETURN_TYPES = ("CLIP",) RETURN_TYPES = ("CLIP",)
FUNCTION = "load_clip" FUNCTION = "load_clip"
CATEGORY = "advanced/loaders" CATEGORY = "model/loaders"
DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncogvideox: t5 xxl (226-token padding)\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nlens: gpt-oss-20b\n pixeldit: gemma 2 2B elm" DESCRIPTION = "Recipes:\nsd: clip-l\nstable cascade: clip-g\nsd3: t5 xxl / clip-g / clip-l\nstable audio: t5 base\nmochi: t5 xxl\ncogvideox: t5 xxl (226-token padding)\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\nhidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nlens: gpt-oss-20b\npixeldit: gemma 2 2B elm"
def load_clip(self, clip_name, type="stable_diffusion", device="default"): def load_clip(self, clip_name, type="stable_diffusion", device="default"):
clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION) clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@ -1005,9 +1005,9 @@ class DualCLIPLoader:
RETURN_TYPES = ("CLIP",) RETURN_TYPES = ("CLIP",)
FUNCTION = "load_clip" FUNCTION = "load_clip"
CATEGORY = "advanced/loaders" CATEGORY = "model/loaders"
DESCRIPTION = "[Recipes]\n\nsdxl: clip-l, clip-g\nsd3: clip-l, clip-g / clip-l, t5 / clip-g, t5\nflux: clip-l, t5\nhidream: at least one of t5 or llama, recommended t5 and llama\nhunyuan_image: qwen2.5vl 7b and byt5 small\nnewbie: gemma-3-4b-it, jina clip v2" DESCRIPTION = "Recipes:\nsdxl: clip-l, clip-g\nsd3: clip-l, clip-g / clip-l, t5 / clip-g, t5\nflux: clip-l, t5\nhidream: at least one of t5 or llama, recommended t5 and llama\nhunyuan_image: qwen2.5vl 7b and byt5 small\nnewbie: gemma-3-4b-it, jina clip v2"
def load_clip(self, clip_name1, clip_name2, type, device="default"): def load_clip(self, clip_name1, clip_name2, type, device="default"):
clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION) clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@ -1088,7 +1088,7 @@ class StyleModelApply:
RETURN_TYPES = ("CONDITIONING",) RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "apply_stylemodel" FUNCTION = "apply_stylemodel"
CATEGORY = "model/conditioning/style_model" CATEGORY = "model/conditioning"
def apply_stylemodel(self, conditioning, style_model, clip_vision_output, strength, strength_type): def apply_stylemodel(self, conditioning, style_model, clip_vision_output, strength, strength_type):
cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
@ -1518,13 +1518,11 @@ class LatentCrop:
class SetLatentNoiseMask: class SetLatentNoiseMask:
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
return {"required": { "samples": ("LATENT",), return {"required": { "samples": ("LATENT",), "mask": ("MASK",), }}
"mask": ("MASK",),
}}
RETURN_TYPES = ("LATENT",) RETURN_TYPES = ("LATENT",)
FUNCTION = "set_mask" FUNCTION = "set_mask"
CATEGORY = "model/latent/inpaint" CATEGORY = "model/latent"
def set_mask(self, samples, mask): def set_mask(self, samples, mask):
s = samples.copy() s = samples.copy()
@ -2045,7 +2043,7 @@ NODE_CLASS_MAPPINGS = {
"ImageBatch": ImageBatch, "ImageBatch": ImageBatch,
"ImagePadForOutpaint": ImagePadForOutpaint, "ImagePadForOutpaint": ImagePadForOutpaint,
"EmptyImage": EmptyImage, "EmptyImage": EmptyImage,
"ConditioningAverage": ConditioningAverage , "ConditioningAverage": ConditioningAverage,
"ConditioningCombine": ConditioningCombine, "ConditioningCombine": ConditioningCombine,
"ConditioningConcat": ConditioningConcat, "ConditioningConcat": ConditioningConcat,
"ConditioningSetArea": ConditioningSetArea, "ConditioningSetArea": ConditioningSetArea,
@ -2101,6 +2099,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"LoraLoader": "Load LoRA (Model and CLIP)", "LoraLoader": "Load LoRA (Model and CLIP)",
"LoraLoaderModelOnly": "Load LoRA", "LoraLoaderModelOnly": "Load LoRA",
"CLIPLoader": "Load CLIP", "CLIPLoader": "Load CLIP",
"DualCLIPLoader": "Load CLIP (Dual)",
"ControlNetLoader": "Load ControlNet Model", "ControlNetLoader": "Load ControlNet Model",
"DiffControlNetLoader": "Load ControlNet Model (diff)", "DiffControlNetLoader": "Load ControlNet Model (diff)",
"StyleModelLoader": "Load Style Model", "StyleModelLoader": "Load Style Model",
@ -2108,6 +2107,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"UNETLoader": "Load Diffusion Model", "UNETLoader": "Load Diffusion Model",
"unCLIPCheckpointLoader": "Load unCLIP Checkpoint", "unCLIPCheckpointLoader": "Load unCLIP Checkpoint",
"GLIGENLoader": "Load GLIGEN Model", "GLIGENLoader": "Load GLIGEN Model",
"DiffusersLoader": "Load Diffusers Model (DEPRECATED)",
# Conditioning # Conditioning
"CLIPVisionEncode": "CLIP Vision Encode", "CLIPVisionEncode": "CLIP Vision Encode",
"StyleModelApply": "Apply Style Model", "StyleModelApply": "Apply Style Model",
@ -2115,12 +2115,16 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"CLIPSetLastLayer": "CLIP Set Last Layer", "CLIPSetLastLayer": "CLIP Set Last Layer",
"ConditioningCombine": "Conditioning (Combine)", "ConditioningCombine": "Conditioning (Combine)",
"ConditioningAverage ": "Conditioning (Average)", "ConditioningAverage ": "Conditioning (Average)",
"ConditioningAverage": "Conditioning (Average)",
"ConditioningConcat": "Conditioning (Concat)", "ConditioningConcat": "Conditioning (Concat)",
"ConditioningSetArea": "Conditioning (Set Area)", "ConditioningSetArea": "Conditioning (Set Area)",
"ConditioningSetAreaPercentage": "Conditioning (Set Area with Percentage)", "ConditioningSetAreaPercentage": "Conditioning (Set Area with Percentage)",
"ConditioningSetAreaStrength": "Conditioning (Set Area Strength)",
"ConditioningSetMask": "Conditioning (Set Mask)", "ConditioningSetMask": "Conditioning (Set Mask)",
"ControlNetApply": "Apply ControlNet (DEPRECATED)", "ControlNetApply": "Apply ControlNet (DEPRECATED)",
"ControlNetApplyAdvanced": "Apply ControlNet", "ControlNetApplyAdvanced": "Apply ControlNet",
"GLIGENTextBoxApply": "Apply GLIGEN Text Box",
"ConditioningZeroOut": "Conditioning Zero Out",
# Latent # Latent
"VAEEncodeForInpaint": "VAE Encode (for Inpainting)", "VAEEncodeForInpaint": "VAE Encode (for Inpainting)",
"SetLatentNoiseMask": "Set Latent Noise Mask", "SetLatentNoiseMask": "Set Latent Noise Mask",
@ -2134,7 +2138,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"LatentUpscaleBy": "Upscale Latent By", "LatentUpscaleBy": "Upscale Latent By",
"LatentComposite": "Latent Composite", "LatentComposite": "Latent Composite",
"LatentBlend": "Latent Blend", "LatentBlend": "Latent Blend",
"LatentFromBatch" : "Latent From Batch", "LatentFromBatch" : "Get Latent From Batch",
"RepeatLatentBatch": "Repeat Latent Batch", "RepeatLatentBatch": "Repeat Latent Batch",
# Image # Image
"EmptyImage": "Empty Image", "EmptyImage": "Empty Image",

View File

@ -939,11 +939,6 @@ components:
additionalProperties: true additionalProperties: true
description: The workflow graph to execute description: The workflow graph to execute
type: object type: object
prompt_id:
description: Optional client-supplied job id. Must be a UUID in canonical lowercase hyphenated form; it is echoed back in the response. Omitted or null means the server generates one.
format: uuid
nullable: true
type: string
workflow_id: workflow_id:
description: UUID identifying the cloud workflow entity to associate with this job description: UUID identifying the cloud workflow entity to associate with this job
type: string type: string
@ -1843,7 +1838,9 @@ paths:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/ErrorResponse' $ref: '#/components/schemas/ErrorResponse'
description: Invalid request (no fields provided) description: |
Invalid request — no fields provided, or `preview_id` is the zero UUID
(`INVALID_PREVIEW_ID`).
"401": "401":
content: content:
application/json: application/json:
@ -1855,7 +1852,10 @@ paths:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/ErrorResponse' $ref: '#/components/schemas/ErrorResponse'
description: Asset not found description: |
Asset not found — returned both when the asset being updated does
not exist and when `preview_id` does not reference an asset
accessible to the caller.
"500": "500":
content: content:
application/json: application/json:
@ -3143,6 +3143,12 @@ paths:
schema: schema:
$ref: '#/components/schemas/PromptErrorResponse' $ref: '#/components/schemas/PromptErrorResponse'
description: Payment required - Insufficient credits description: Payment required - Insufficient credits
"413":
content:
application/json:
schema:
$ref: '#/components/schemas/PromptErrorResponse'
description: Workflow JSON too large
"429": "429":
content: content:
application/json: application/json:

View File

@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.24.0" version = "0.25.0"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.10" requires-python = ">=3.10"