From e5b7140dcc5a88a6ad673a249eed223238e45a2b Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 10 Jun 2026 16:55:25 -0700 Subject: [PATCH 01/52] feat(assets): add job_ids filter to GET /api/assets (#13998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(assets): add job_ids filter to GET /api/assets Mirrors the existing cloud `job_ids` query param on the local Python server: clients can pass a comma-separated list (or repeated query params) of UUIDs to filter assets by their associated job. The `AssetReference.job_id` column already exists, so no migration is needed — this just plumbs the filter through schema → service → query. Marks the parameter as available in both runtimes by dropping the `[cloud-only]` description prefix and the `x-runtime: [cloud]` tag from the OpenAPI spec, per the OSS field-drift convention (absent runtime tag = populated by both local and cloud). * fix(assets): tighten job_ids — array schema, max_length, narrow except From cursor-reviews on the parent commit: - OpenAPI: declare job_ids as `type: array, items: string format: uuid` with `style: form, explode: true` so it matches the documented contract (and matches sibling include_tags/exclude_tags shape). Description now states both accepted shapes explicitly. - Schema: cap `job_ids` at 500 entries (max_length on the Pydantic field) so a client can't splice an unbounded list into the IN clauses. - Schema: drop `AttributeError` from the except — `raw` only contains `str` items by construction, so `uuid.UUID()` raises `ValueError` exclusively; the second clause was dead code. * fix(assets): tighten job_ids validator + add schema-level tests Aligns with the parallel hardening from draft PR #13848 (now closed as a duplicate). The validator now: - Raises ValueError on non-string list items (was: silently dropped). - Raises ValueError on non-string / non-list top-level values like dict or int (was: silently passed through to Pydantic's downstream coercion). Adds tests-unit/assets_test/queries/test_list_assets_query.py covering the validator end-to-end: CSV canonicalization, dedup order, default empty, invalid UUID, non-string list item, non-string non-list value, and the max_length=500 boundary. * feat(prompt): enforce canonical UUID prompt_id at job creation POST /prompt previously accepted any client-supplied prompt_id verbatim, str()-coercing even non-strings, and minting the literal job id "None" for an explicit JSON null. The new GET /api/assets job_ids filter matches stored job ids as canonical UUIDs exactly, so a non-UUID id minted a job whose assets could never be filtered. - validate_job_id (comfy_execution/jobs.py): requires a string in the canonical lowercase hyphenated UUID form; raises ValueError otherwise, including parseable-but-non-canonical spellings (uppercase, braced, URN, bare hex), which would otherwise be silently rewritten and then miss every exact-match lookup downstream (history keys, websocket correlation, /interrupt, the assets job_ids filter). - POST /prompt: absent or null prompt_id means the server mints uuid4; invalid means 400 invalid_prompt_id on the standard error envelope. - openapi.yaml: document the request-side prompt_id (format uuid, nullable) on PromptRequest. - tests: unit matrix for validate_job_id; integration tests against the booted server covering rejection, acceptance, and null handling. --------- Co-authored-by: guill --- app/assets/api/routes.py | 1 + app/assets/api/schemas_in.py | 36 ++++++++++ .../database/queries/asset_reference.py | 6 ++ app/assets/services/asset_management.py | 2 + comfy_execution/jobs.py | 21 ++++++ openapi.yaml | 5 ++ server.py | 18 ++++- .../assets_test/queries/test_asset_info.py | 50 ++++++++++++++ .../queries/test_list_assets_query.py | 60 ++++++++++++++++ .../assets_test/test_prompt_id_enforcement.py | 69 +++++++++++++++++++ tests/execution/test_jobs.py | 43 ++++++++++++ 11 files changed, 309 insertions(+), 2 deletions(-) create mode 100644 tests-unit/assets_test/queries/test_list_assets_query.py create mode 100644 tests-unit/assets_test/test_prompt_id_enforcement.py diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 7ef462f5c..6c9a3200d 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -219,6 +219,7 @@ async def list_assets_route(request: web.Request) -> web.Response: exclude_tags=q.exclude_tags, name_contains=q.name_contains, metadata_filter=q.metadata_filter, + job_ids=q.job_ids, limit=q.limit, offset=q.offset, sort=sort, diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index af666746d..4ae18c65a 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -1,4 +1,5 @@ import json +import uuid from dataclasses import dataclass from typing import Any, Literal @@ -53,6 +54,7 @@ class ListAssetsQuery(BaseModel): include_tags: list[str] = Field(default_factory=list) exclude_tags: list[str] = Field(default_factory=list) name_contains: str | None = None + job_ids: list[str] = Field(default_factory=list, max_length=500) # Accept either a JSON string (query param) or a dict metadata_filter: dict[str, Any] | None = None @@ -86,6 +88,40 @@ class ListAssetsQuery(BaseModel): return out return v + @field_validator("job_ids", mode="before") + @classmethod + def _split_and_validate_job_ids(cls, v): + # Accept "uuid1,uuid2" or ["uuid1","uuid2"] or repeated query params. + # Each entry must parse as a UUID; canonicalized to lowercase hyphenated form. + if v is None: + return [] + if isinstance(v, str): + raw = [t.strip() for t in v.split(",") if t.strip()] + elif isinstance(v, list): + raw = [] + for item in v: + if not isinstance(item, str): + raise ValueError( + f"job_ids entries must be strings, got {type(item).__name__}" + ) + raw.extend([t.strip() for t in item.split(",") if t.strip()]) + else: + raise ValueError( + f"job_ids must be a string or list of strings, got {type(v).__name__}" + ) + + out: list[str] = [] + seen: set[str] = set() + for s in raw: + try: + canonical = str(uuid.UUID(s)) + except ValueError as e: + raise ValueError(f"job_ids must be UUIDs: {s!r}") from e + if canonical not in seen: + seen.add(canonical) + out.append(canonical) + return out + @field_validator("metadata_filter", mode="before") @classmethod def _parse_metadata_json(cls, v): diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 792411800..33ded8a1c 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -264,6 +264,7 @@ def list_references_page( include_tags: Sequence[str] | None = None, exclude_tags: Sequence[str] | None = None, metadata_filter: dict | None = None, + job_ids: Sequence[str] | None = None, sort: str | None = None, order: str | None = None, after_cursor_value: object | None = None, @@ -293,6 +294,9 @@ def list_references_page( escaped, esc = escape_sql_like_string(name_contains) base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc)) + if job_ids: + base = base.where(AssetReference.job_id.in_(list(job_ids))) + base = apply_tag_filters(base, include_tags, exclude_tags) base = apply_metadata_filter(base, metadata_filter) @@ -345,6 +349,8 @@ def list_references_page( count_stmt = count_stmt.where( AssetReference.name.ilike(f"%{escaped}%", escape=esc) ) + if job_ids: + count_stmt = count_stmt.where(AssetReference.job_id.in_(list(job_ids))) count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags) count_stmt = apply_metadata_filter(count_stmt, metadata_filter) diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index d4e4fc61c..53aec7a15 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -274,6 +274,7 @@ def list_assets_page( exclude_tags: Sequence[str] | None = None, name_contains: str | None = None, metadata_filter: dict | None = None, + job_ids: Sequence[str] | None = None, limit: int = 20, offset: int = 0, sort: str = "created_at", @@ -319,6 +320,7 @@ def list_assets_page( exclude_tags=exclude_tags, name_contains=name_contains, metadata_filter=metadata_filter, + job_ids=job_ids, limit=fetch_limit, offset=offset, sort=sort, diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py index fcd7ef735..3fbcc3eb0 100644 --- a/comfy_execution/jobs.py +++ b/comfy_execution/jobs.py @@ -3,6 +3,7 @@ Job utilities for the /api/jobs endpoint. Provides normalization and helper functions for job status tracking. """ +import uuid from typing import Optional from comfy_api.internal import prune_dict @@ -19,6 +20,26 @@ class JobStatus: ALL = [PENDING, IN_PROGRESS, COMPLETED, FAILED, CANCELLED] +def validate_job_id(value) -> str: + """Validate a client-supplied job (prompt) id. + + Job ids must be UUIDs in the canonical lowercase hyphenated form. The id + is stored and compared verbatim everywhere downstream — history keys, + websocket events, /interrupt matching, and the assets ``job_ids`` filter + (a String(36) column matched exactly) — so accepting another spelling + would either rewrite the client's id behind its back or mint a job whose + outputs the filter can never find. Rejecting loudly beats both. + + Returns the id unchanged. Raises ValueError when the value is not a + string in canonical UUID form. + """ + if not isinstance(value, str): + raise ValueError(f"job id must be a string, got {type(value).__name__}") + if str(uuid.UUID(value)) != value: + raise ValueError("job id must be a UUID in canonical lowercase hyphenated form") + return value + + # Media types that can be previewed in the frontend PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio', '3d', 'text'}) diff --git a/openapi.yaml b/openapi.yaml index c27ed7adf..58614103a 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -896,6 +896,11 @@ components: additionalProperties: true description: The workflow graph to execute type: object + prompt_id: + description: Optional client-supplied job id. Must be a UUID in canonical lowercase hyphenated form; it is echoed back in the response. Omitted or null means the server generates one. + format: uuid + nullable: true + type: string workflow_id: description: UUID identifying the cloud workflow entity to associate with this job type: string diff --git a/server.py b/server.py index a85c1e591..cc3b33a5c 100644 --- a/server.py +++ b/server.py @@ -8,7 +8,7 @@ import time import nodes import folder_paths import execution -from comfy_execution.jobs import JobStatus, get_job, get_all_jobs +from comfy_execution.jobs import JobStatus, get_job, get_all_jobs, validate_job_id import uuid import urllib import json @@ -942,7 +942,21 @@ class PromptServer(): if "prompt" in json_data: prompt = json_data["prompt"] - prompt_id = str(json_data.get("prompt_id", uuid.uuid4())) + client_prompt_id = json_data.get("prompt_id") + if client_prompt_id is None: + # Absent or explicit null: the server mints the id. + prompt_id = str(uuid.uuid4()) + else: + try: + prompt_id = validate_job_id(client_prompt_id) + except ValueError: + error = { + "type": "invalid_prompt_id", + "message": "prompt_id must be a valid UUID", + "details": "prompt_id must be a UUID string in canonical lowercase hyphenated form; omit it to let the server generate one", + "extra_info": {} + } + return web.json_response({"error": error, "node_errors": {}}, status=400) partial_execution_targets = None if "partial_execution_targets" in json_data: diff --git a/tests-unit/assets_test/queries/test_asset_info.py b/tests-unit/assets_test/queries/test_asset_info.py index fe510e342..ba729a270 100644 --- a/tests-unit/assets_test/queries/test_asset_info.py +++ b/tests-unit/assets_test/queries/test_asset_info.py @@ -158,6 +158,56 @@ class TestListReferencesPage: refs, _, _ = list_references_page(session, sort="name", order="asc") assert refs[0].name == "large" + def test_job_ids_filter(self, session: Session): + asset = _make_asset(session, "hash1") + job_a = str(uuid.uuid4()) + job_b = str(uuid.uuid4()) + ref_a = _make_reference(session, asset, name="from_job_a") + ref_a.job_id = job_a + ref_b = _make_reference(session, asset, name="from_job_b") + ref_b.job_id = job_b + _make_reference(session, asset, name="no_job") + session.commit() + + # Single job filter + refs, _, total = list_references_page(session, job_ids=[job_a]) + assert total == 1 + assert refs[0].name == "from_job_a" + + # Multi-job filter (IN) + refs, _, total = list_references_page(session, job_ids=[job_a, job_b]) + names = sorted(r.name for r in refs) + assert total == 2 + assert names == ["from_job_a", "from_job_b"] + + # Unknown job id matches nothing + refs, _, total = list_references_page(session, job_ids=[str(uuid.uuid4())]) + assert total == 0 + assert refs == [] + + # Empty/None means no filter -> all three references + refs, _, total = list_references_page(session, job_ids=[]) + assert total == 3 + refs, _, total = list_references_page(session, job_ids=None) + assert total == 3 + + def test_job_ids_combined_with_other_filters(self, session: Session): + asset = _make_asset(session, "hash1") + job_a = str(uuid.uuid4()) + ref_match = _make_reference(session, asset, name="match.bin") + ref_match.job_id = job_a + ref_wrong_name = _make_reference(session, asset, name="other.bin") + ref_wrong_name.job_id = job_a + ref_wrong_job = _make_reference(session, asset, name="match.bin") + ref_wrong_job.job_id = str(uuid.uuid4()) + session.commit() + + refs, _, total = list_references_page( + session, job_ids=[job_a], name_contains="match" + ) + assert total == 1 + assert refs[0].id == ref_match.id + class TestFetchReferenceAssetAndTags: def test_returns_none_for_nonexistent(self, session: Session): diff --git a/tests-unit/assets_test/queries/test_list_assets_query.py b/tests-unit/assets_test/queries/test_list_assets_query.py new file mode 100644 index 000000000..e8d3430e2 --- /dev/null +++ b/tests-unit/assets_test/queries/test_list_assets_query.py @@ -0,0 +1,60 @@ +"""Schema-level unit tests for ListAssetsQuery (no DB required).""" +import uuid + +import pytest +from pydantic import ValidationError + +from app.assets.api.schemas_in import ListAssetsQuery + + +class TestJobIdsValidator: + def test_csv_string_parses_and_canonicalizes(self): + a = "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE" + b = "11111111-2222-3333-4444-555555555555" + q = ListAssetsQuery.model_validate({"job_ids": f"{a},{b}"}) + # Canonicalized to lowercase + assert q.job_ids == [a.lower(), b] + + def test_repeated_query_params_as_list(self): + a = "11111111-1111-1111-1111-111111111111" + b = "22222222-2222-2222-2222-222222222222" + q = ListAssetsQuery.model_validate({"job_ids": [a, b]}) + assert q.job_ids == [a, b] + + def test_dedup_preserves_first_seen_order(self): + a = "11111111-1111-1111-1111-111111111111" + b = "22222222-2222-2222-2222-222222222222" + q = ListAssetsQuery.model_validate({"job_ids": [a, b, a]}) + assert q.job_ids == [a, b] + + def test_default_empty(self): + q = ListAssetsQuery.model_validate({}) + assert q.job_ids == [] + + def test_invalid_uuid_rejected(self): + with pytest.raises(ValidationError) as exc: + ListAssetsQuery.model_validate({"job_ids": "not-a-uuid"}) + assert "must be UUIDs" in str(exc.value) + + def test_non_string_list_item_rejected(self): + with pytest.raises(ValidationError) as exc: + ListAssetsQuery.model_validate( + {"job_ids": ["11111111-1111-1111-1111-111111111111", 42]} + ) + assert "must be strings" in str(exc.value) + + def test_non_string_non_list_value_rejected(self): + with pytest.raises(ValidationError) as exc: + ListAssetsQuery.model_validate({"job_ids": {"bad": "shape"}}) + assert "must be a string or list of strings" in str(exc.value) + + def test_max_length_enforced(self): + too_many = [str(uuid.uuid4()) for _ in range(501)] + with pytest.raises(ValidationError) as exc: + ListAssetsQuery.model_validate({"job_ids": too_many}) + assert exc.value.errors()[0]["type"] == "too_long" + + def test_max_length_boundary_accepted(self): + at_cap = [str(uuid.uuid4()) for _ in range(500)] + q = ListAssetsQuery.model_validate({"job_ids": at_cap}) + assert len(q.job_ids) == 500 diff --git a/tests-unit/assets_test/test_prompt_id_enforcement.py b/tests-unit/assets_test/test_prompt_id_enforcement.py new file mode 100644 index 000000000..fb961beae --- /dev/null +++ b/tests-unit/assets_test/test_prompt_id_enforcement.py @@ -0,0 +1,69 @@ +"""POST /prompt enforces canonical-UUID job ids at creation time. + +Lives in assets_test because it uses this suite's booted-server fixture and +because the invariant exists for the assets pipeline: the GET /api/assets +``job_ids`` filter matches stored job ids exactly, so a job minted with a +non-canonical id would produce assets the filter can never find. + +The prompt bodies here are intentionally invalid workflows — prompt_id +validation happens before workflow validation, so a rejected id returns +``invalid_prompt_id`` while an accepted id falls through to the ordinary +workflow-validation error (proving it cleared the id check). +""" +import requests + + +def _post_prompt(http: requests.Session, api_base: str, body: dict) -> requests.Response: + return http.post(api_base + "/prompt", json=body, timeout=30) + + +def _error_type(r: requests.Response) -> str: + return r.json()["error"]["type"] + + +def test_non_uuid_prompt_id_rejected(http: requests.Session, api_base: str): + r = _post_prompt(http, api_base, {"prompt": {}, "prompt_id": "not-a-uuid"}) + assert r.status_code == 400, r.text + assert _error_type(r) == "invalid_prompt_id" + + +def test_non_string_prompt_id_rejected(http: requests.Session, api_base: str): + # Previously str()-coerced (123 became the job id "123"); must now be a 400, + # not a 500 from uuid.UUID choking on a non-string. + r = _post_prompt(http, api_base, {"prompt": {}, "prompt_id": 123}) + assert r.status_code == 400, r.text + assert _error_type(r) == "invalid_prompt_id" + + +def test_non_canonical_uuid_rejected(http: requests.Session, api_base: str): + # Parseable as a UUID, but not the canonical lowercase form: rejected + # loudly rather than silently rewritten (downstream lookups match the + # stored id exactly). + r = _post_prompt( + http, + api_base, + {"prompt": {}, "prompt_id": "AAAAAAAA-BBBB-4CCC-8DDD-EEEEEEEEEEEE"}, + ) + assert r.status_code == 400, r.text + assert _error_type(r) == "invalid_prompt_id" + + +def test_canonical_uuid_accepted(http: requests.Session, api_base: str): + # The id clears validation; the empty workflow then fails ordinary prompt + # validation, proving the request got past the id check. + r = _post_prompt( + http, + api_base, + {"prompt": {}, "prompt_id": "aaaaaaaa-bbbb-4ccc-8ddd-eeeeeeeeeeee"}, + ) + assert r.status_code == 400, r.text + assert _error_type(r) != "invalid_prompt_id" + + +def test_null_prompt_id_not_rejected(http: requests.Session, api_base: str): + # Explicit null means "server generates" and must not be rejected as an + # invalid id. (The minted id itself is not observable here because the + # workflow is invalid; unit tests cover validate_job_id directly.) + r = _post_prompt(http, api_base, {"prompt": {}, "prompt_id": None}) + assert r.status_code == 400, r.text + assert _error_type(r) != "invalid_prompt_id" diff --git a/tests/execution/test_jobs.py b/tests/execution/test_jobs.py index 814af5c13..30e47071d 100644 --- a/tests/execution/test_jobs.py +++ b/tests/execution/test_jobs.py @@ -1,5 +1,7 @@ """Unit tests for comfy_execution/jobs.py""" +import pytest + from comfy_execution.jobs import ( JobStatus, is_previewable, @@ -10,9 +12,50 @@ from comfy_execution.jobs import ( get_outputs_summary, apply_sorting, has_3d_extension, + validate_job_id, ) +class TestValidateJobId: + """validate_job_id guards job creation: POST /prompt rejects ids it raises on.""" + + def test_canonical_form_passes_through(self): + cid = "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7" + assert validate_job_id(cid) == cid + + @pytest.mark.parametrize( + "variant", + [ + "A1B2C3D4-E5F6-7A89-B0C1-D2E3F4A5B6C7", # uppercase + "{a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7}", # braced + "urn:uuid:a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7", # URN + "a1b2c3d4e5f67a89b0c1d2e3f4a5b6c7", # bare hex + " a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7 ", # padded + ], + ) + def test_non_canonical_spellings_rejected(self, variant): + # uuid.UUID parses all of these, but accepting them would silently + # rewrite the client's id (history keys, websocket events, and the + # assets job_ids filter all match the stored form exactly). + with pytest.raises(ValueError): + validate_job_id(variant) + + @pytest.mark.parametrize( + "bad", + ["", "not-a-uuid", "prompt-123", "a1b2c3d4-e5f6-7a89-b0c1", "None"], + ) + def test_non_uuid_strings_rejected(self, bad): + with pytest.raises(ValueError): + validate_job_id(bad) + + @pytest.mark.parametrize("bad", [123, 1.5, True, None, ["a"], {"id": "x"}]) + def test_non_strings_rejected(self, bad): + # uuid.UUID raises AttributeError/TypeError on non-strings; the helper + # must normalize those to ValueError so callers need one except clause. + with pytest.raises(ValueError): + validate_job_id(bad) + + class TestJobStatus: """Test JobStatus constants.""" From ce200c0850182722cfd6e0f9f9bd3f619e48281e Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 10 Jun 2026 17:04:52 -0700 Subject: [PATCH 02/52] feat(assets): include asset id in executed WebSocket message (#13862) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(assets): enrich executed WS message with asset metadata When --enable-assets is set, each file-type output entry in the `executed` WebSocket message now includes id, name, asset_hash, size, and mime_type — matching the shape already returned by /upload/image. The enrichment lives in comfy_execution/asset_enrichment.py (no torch dependency) and is called from both send sites in execution.py: freshly executed nodes register the file inline via register_file_in_place; cached node re-sends look up the existing AssetReference by file path to avoid re-hashing. Errors are caught per-entry so a failure never blocks the WS message from sending. * fix(assets): inject only id in executed WS message per Asset Identity RFC Per the Asset Identity RFC, the executed WebSocket payload should carry id alone — hash is already encoded in the filename, and name/preview_url/ size belong behind GET /api/assets/{id} rather than being pushed eagerly. Simplifies the DB lookup path: we only need ref.id, so the asset.hash null-check is no longer required as a fallback trigger. * fix(assets): reject path traversal when resolving output abs_path Subfolder/filename were joined and absolutized without containment check, so '..' segments or an absolute filename could escape the type's base directory and register an unrelated on-disk file as an asset. Add commonpath-based containment check; skip enrichment (warn, leave entry unchanged) when the resolved path escapes base. Catches ValueError from cross-drive paths on Windows. * docs(assets): drop Asset Identity RFC reference from docstring * docs(assets): trim docstring to what enrichment does, not what it doesn't * test(assets): use real platform paths so containment check works on Windows The previous test setup patched os.path.abspath to identity and used a POSIX-style '/output' base, which collided with Windows path separators in os.path.commonpath. Drop the abspath/join patches and use a real tempdir-rooted base so the containment check runs against actual platform paths. * refactor(assets): enrich at output-processing time, not in the WS send path Per review: enrichment lived inside the client_id-guarded send sites, so a headless run (no websocket client) never registered assets at all, and ui_outputs/history stored the un-enriched entries. Now output_ui is enriched once, right after the node produces it and before it is stored in ui_outputs — so registration happens regardless of connected clients, and the asset id flows into history and the execution cache for free. _send_cached_ui re-sends the stored (already-enriched) dict verbatim, which lets the DB-lookup-by-path fallback be deleted: every enrichment is now a fresh output, and register_file_in_place re-hashes on upsert so an overwritten path can never carry a stale id. --- comfy_execution/asset_enrichment.py | 66 ++++++ execution.py | 6 + .../execution_test/test_enrich_output.py | 205 ++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 comfy_execution/asset_enrichment.py create mode 100644 tests-unit/execution_test/test_enrich_output.py diff --git a/comfy_execution/asset_enrichment.py b/comfy_execution/asset_enrichment.py new file mode 100644 index 000000000..38e9496a8 --- /dev/null +++ b/comfy_execution/asset_enrichment.py @@ -0,0 +1,66 @@ +"""Enrich executed-node output entries with asset id.""" +import logging +import os + + +def enrich_output_with_assets(output_ui: dict) -> dict: + """Register file-type output entries as assets and inject their ``id``. + + Runs at output-processing time, once per produced output, when + --enable-assets is set. Returns a new dict; entries without a resolvable + on-disk file path are left unchanged. Errors are caught per-entry so a + failure never blocks execution or the other entries. + """ + from comfy.cli_args import args + if not args.enable_assets: + return output_ui + + import folder_paths + from app.assets.services.ingest import register_file_in_place, DependencyMissingError + + enriched = {} + for key, entries in output_ui.items(): + if not isinstance(entries, list): + enriched[key] = entries + continue + new_entries = [] + for entry in entries: + if not isinstance(entry, dict) or "filename" not in entry or "type" not in entry: + new_entries.append(entry) + continue + try: + base = folder_paths.get_directory_by_type(entry["type"]) + if base is None: + new_entries.append(entry) + continue + base_abs = os.path.abspath(base) + abs_path = os.path.abspath(os.path.join(base_abs, entry.get("subfolder") or "", entry["filename"])) + try: + if os.path.commonpath([base_abs, abs_path]) != base_abs: + raise ValueError("escapes base") + except ValueError: + logging.warning("Asset enrichment skipped (path escapes base): %s", entry.get("filename")) + new_entries.append(entry) + continue + if not os.path.isfile(abs_path): + new_entries.append(entry) + continue + + # Register unconditionally: the file was just produced, and + # register_file_in_place re-hashes so an overwritten path can + # never carry a stale id. + result = register_file_in_place( + abs_path=abs_path, + name=entry["filename"], + tags=[entry["type"]], + ) + + entry = dict(entry) + entry["id"] = result.ref.id + except DependencyMissingError: + logging.warning("Asset enrichment skipped (blake3 not available): %s", entry.get("filename")) + except Exception: + logging.warning("Failed to enrich output entry with asset id: %s", entry.get("filename"), exc_info=True) + new_entries.append(entry) + enriched[key] = new_entries + return enriched diff --git a/execution.py b/execution.py index 5246d651c..e6c6f39d6 100644 --- a/execution.py +++ b/execution.py @@ -40,6 +40,7 @@ from comfy_execution.graph_utils import GraphBuilder, is_link from comfy_execution.validation import validate_node_input from comfy_execution.progress import get_progress_state, reset_progress_state, add_progress_handler, WebUIProgressHandler from comfy_execution.utils import CurrentNodeContext +from comfy_execution.asset_enrichment import enrich_output_with_assets from comfy_api.internal import _ComfyNodeInternal, _NodeOutputInternal, first_real_override, is_class, make_locked_method_func from comfy_api.latest import io, _io from comfy_execution.cache_provider import _has_cache_providers, _get_cache_providers, _logger as _cache_logger @@ -418,6 +419,7 @@ def _is_intermediate_output(dynprompt, node_id): class_def = nodes.NODE_CLASS_MAPPINGS[class_type] return getattr(class_def, 'HAS_INTERMEDIATE_OUTPUT', False) + def _send_cached_ui(server, node_id, display_node_id, cached, prompt_id, ui_outputs): if server.client_id is None: return @@ -552,6 +554,10 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed, asyncio.create_task(await_completion()) return (ExecutionResult.PENDING, None, None) if len(output_ui) > 0: + # Enrich at output-processing time (not in the send path) so assets + # are registered even when no client is connected, and the asset id + # flows into ui_outputs and the cache alongside the raw entries. + output_ui = enrich_output_with_assets(output_ui) ui_outputs[unique_id] = { "meta": { "node_id": unique_id, diff --git a/tests-unit/execution_test/test_enrich_output.py b/tests-unit/execution_test/test_enrich_output.py new file mode 100644 index 000000000..61490c49e --- /dev/null +++ b/tests-unit/execution_test/test_enrich_output.py @@ -0,0 +1,205 @@ +"""Tests for enrich_output_with_assets in comfy_execution/asset_enrichment.py.""" +import os +import types +import unittest +from unittest.mock import MagicMock, patch + + +def _make_args(enable_assets: bool): + a = types.SimpleNamespace() + a.enable_assets = enable_assets + return a + + +def _make_register_result(ref_id="ref-id-2"): + result = MagicMock() + result.ref.id = ref_id + return result + + +# Platform-appropriate absolute base. tempfile.gettempdir() returns C:\... on +# Windows and /tmp on POSIX, so containment via commonpath behaves naturally. +_DEFAULT_BASE = os.path.join(__import__("tempfile").gettempdir(), "asset-enrichment-test-base") + + +def _mocked_modules(*, enable_assets=True, register_file_in_place=None, directory=_DEFAULT_BASE): + return { + "comfy.cli_args": MagicMock(args=_make_args(enable_assets)), + "folder_paths": MagicMock(get_directory_by_type=MagicMock(return_value=directory)), + "app.assets.services.ingest": MagicMock( + register_file_in_place=register_file_in_place or MagicMock(return_value=_make_register_result()), + DependencyMissingError=type("DependencyMissingError", (Exception,), {}), + ), + } + + +def _call(output_ui, *, enable_assets=True, file_exists=True, register_result=None, directory=_DEFAULT_BASE): + register_mock = MagicMock(return_value=register_result or _make_register_result()) + mocked = _mocked_modules( + enable_assets=enable_assets, + register_file_in_place=register_mock, + directory=directory, + ) + + # Only os.path.isfile is patched — abspath/join must run natively so the + # containment check sees real platform paths. + with patch.dict("sys.modules", mocked), \ + patch("os.path.isfile", return_value=file_exists): + import importlib + import comfy_execution.asset_enrichment as mod + importlib.reload(mod) + return mod.enrich_output_with_assets(output_ui) + + +class TestEnrichOutputWithAssets(unittest.TestCase): + + def test_disabled_returns_unchanged(self): + output = {"images": [{"filename": "a.png", "subfolder": "", "type": "output"}]} + result = _call(output, enable_assets=False) + self.assertNotIn("id", result["images"][0]) + + def test_non_list_value_passed_through(self): + output = {"text": "hello"} + result = _call(output) + self.assertEqual(result["text"], "hello") + + def test_entry_without_filename_unchanged(self): + output = {"latent": [{"subfolder": "", "type": "output"}]} + result = _call(output) + self.assertNotIn("id", result["latent"][0]) + + def test_entry_without_type_unchanged(self): + output = {"data": [{"filename": "a.png", "subfolder": ""}]} + result = _call(output) + self.assertNotIn("id", result["data"][0]) + + def test_file_not_on_disk_unchanged(self): + output = {"images": [{"filename": "missing.png", "subfolder": "", "type": "output"}]} + result = _call(output, file_exists=False) + self.assertNotIn("id", result["images"][0]) + + def test_unknown_type_returns_none_directory_unchanged(self): + output = {"images": [{"filename": "a.png", "subfolder": "", "type": "unknown"}]} + result = _call(output, directory=None) + self.assertNotIn("id", result["images"][0]) + + def test_register_injects_only_id(self): + reg = _make_register_result(ref_id="inline-ref") + output = {"images": [{"filename": "new.png", "subfolder": "", "type": "output"}]} + result = _call(output, register_result=reg) + img = result["images"][0] + self.assertEqual(img["id"], "inline-ref") + # Only id is injected — no asset_hash, name, preview_url, size + self.assertNotIn("asset_hash", img) + self.assertNotIn("name", img) + self.assertNotIn("preview_url", img) + self.assertNotIn("size", img) + + def test_register_called_per_entry(self): + register_mock = MagicMock(return_value=_make_register_result()) + mocked = _mocked_modules(register_file_in_place=register_mock) + output = { + "images": [ + {"filename": "a.png", "subfolder": "", "type": "output"}, + {"filename": "b.png", "subfolder": "", "type": "output"}, + ] + } + + with patch.dict("sys.modules", mocked), \ + patch("os.path.isfile", return_value=True): + import importlib + import comfy_execution.asset_enrichment as mod + importlib.reload(mod) + mod.enrich_output_with_assets(output) + + self.assertEqual(register_mock.call_count, 2) + + def test_original_entry_not_mutated(self): + orig = {"filename": "a.png", "subfolder": "", "type": "output"} + output = {"images": [orig]} + _call(output) + self.assertNotIn("id", orig) + + def test_enrichment_error_does_not_block_sibling_entries(self): + call_count = [0] + good_reg = _make_register_result(ref_id="good-ref") + + def register_side_effect(abs_path, name, tags): + call_count[0] += 1 + if call_count[0] == 1: + raise RuntimeError("boom") + return good_reg + + mocked = _mocked_modules(register_file_in_place=register_side_effect) + + output = { + "images": [ + {"filename": "bad.png", "subfolder": "", "type": "output"}, + {"filename": "good.png", "subfolder": "", "type": "output"}, + ] + } + + with patch.dict("sys.modules", mocked), \ + patch("os.path.isfile", return_value=True): + import importlib + import comfy_execution.asset_enrichment as mod + importlib.reload(mod) + result = mod.enrich_output_with_assets(output) + + imgs = result["images"] + self.assertNotIn("id", imgs[0]) + self.assertEqual(imgs[1]["id"], "good-ref") + + def test_multiple_output_keys_all_enriched(self): + output = { + "images": [{"filename": "a.png", "subfolder": "", "type": "output"}], + "videos": [{"filename": "b.mp4", "subfolder": "", "type": "output"}], + } + result = _call(output) + self.assertIn("id", result["images"][0]) + self.assertIn("id", result["videos"][0]) + + def test_none_entry_in_list_unchanged(self): + output = {"images": [None, {"filename": "a.png", "subfolder": "", "type": "output"}]} + result = _call(output) + self.assertIsNone(result["images"][0]) + self.assertIn("id", result["images"][1]) + + def test_path_traversal_subfolder_skipped(self): + register_mock = MagicMock(return_value=_make_register_result()) + mocked = _mocked_modules(register_file_in_place=register_mock) + + output = {"images": [{"filename": "passwd", "subfolder": "../../etc", "type": "output"}]} + + # Do NOT patch os.path.abspath — real resolution is required for the containment check. + with patch.dict("sys.modules", mocked), \ + patch("os.path.isfile", return_value=True): + import importlib + import comfy_execution.asset_enrichment as mod + importlib.reload(mod) + result = mod.enrich_output_with_assets(output) + + self.assertNotIn("id", result["images"][0]) + register_mock.assert_not_called() + + def test_absolute_filename_skipped(self): + register_mock = MagicMock(return_value=_make_register_result()) + mocked = _mocked_modules(register_file_in_place=register_mock) + + # Absolute filename — os.path.join discards earlier components when a later one is absolute. + absolute_filename = os.path.abspath(os.sep + "etc" + os.sep + "passwd") + output = {"images": [{"filename": absolute_filename, "subfolder": "", "type": "output"}]} + + with patch.dict("sys.modules", mocked), \ + patch("os.path.isfile", return_value=True): + import importlib + import comfy_execution.asset_enrichment as mod + importlib.reload(mod) + result = mod.enrich_output_with_assets(output) + + self.assertNotIn("id", result["images"][0]) + register_mock.assert_not_called() + + +if __name__ == "__main__": + unittest.main() From 431a1888d31114ef4959c8a9fb286a5cac8688f0 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 10 Jun 2026 19:23:01 -0700 Subject: [PATCH 03/52] revert(assets): drop job_ids filter from GET /api/assets (#14408) The job_ids query filter added in #13998 has no live consumer: the frontend Generated tab kept sourcing from GET /jobs, and the cloud side removed its equivalent filter from the shared asset spec. Carrying it on the local server only re-introduces Core<->Cloud drift on the shared contract, so remove it to match. Removed: the job_ids field + validator on ListAssetsQuery, the IN(...) clauses in list_references_page, the service/route passthrough, and the filter-only tests. Kept: the canonical-UUID prompt_id enforcement at job creation (also landed in #13998). It stands on its own -- job ids are matched verbatim by history keys, websocket correlation, and /interrupt -- and cloud inherits it by running core for execution, so no divergence is created. --- app/assets/api/routes.py | 1 - app/assets/api/schemas_in.py | 36 ----------- .../database/queries/asset_reference.py | 6 -- app/assets/services/asset_management.py | 2 - comfy_execution/jobs.py | 7 +-- .../assets_test/queries/test_asset_info.py | 50 ---------------- .../queries/test_list_assets_query.py | 60 ------------------- .../assets_test/test_prompt_id_enforcement.py | 8 +-- tests/execution/test_jobs.py | 4 +- 9 files changed, 9 insertions(+), 165 deletions(-) delete mode 100644 tests-unit/assets_test/queries/test_list_assets_query.py diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 6c9a3200d..7ef462f5c 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -219,7 +219,6 @@ async def list_assets_route(request: web.Request) -> web.Response: exclude_tags=q.exclude_tags, name_contains=q.name_contains, metadata_filter=q.metadata_filter, - job_ids=q.job_ids, limit=q.limit, offset=q.offset, sort=sort, diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 4ae18c65a..af666746d 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -1,5 +1,4 @@ import json -import uuid from dataclasses import dataclass from typing import Any, Literal @@ -54,7 +53,6 @@ class ListAssetsQuery(BaseModel): include_tags: list[str] = Field(default_factory=list) exclude_tags: list[str] = Field(default_factory=list) name_contains: str | None = None - job_ids: list[str] = Field(default_factory=list, max_length=500) # Accept either a JSON string (query param) or a dict metadata_filter: dict[str, Any] | None = None @@ -88,40 +86,6 @@ class ListAssetsQuery(BaseModel): return out return v - @field_validator("job_ids", mode="before") - @classmethod - def _split_and_validate_job_ids(cls, v): - # Accept "uuid1,uuid2" or ["uuid1","uuid2"] or repeated query params. - # Each entry must parse as a UUID; canonicalized to lowercase hyphenated form. - if v is None: - return [] - if isinstance(v, str): - raw = [t.strip() for t in v.split(",") if t.strip()] - elif isinstance(v, list): - raw = [] - for item in v: - if not isinstance(item, str): - raise ValueError( - f"job_ids entries must be strings, got {type(item).__name__}" - ) - raw.extend([t.strip() for t in item.split(",") if t.strip()]) - else: - raise ValueError( - f"job_ids must be a string or list of strings, got {type(v).__name__}" - ) - - out: list[str] = [] - seen: set[str] = set() - for s in raw: - try: - canonical = str(uuid.UUID(s)) - except ValueError as e: - raise ValueError(f"job_ids must be UUIDs: {s!r}") from e - if canonical not in seen: - seen.add(canonical) - out.append(canonical) - return out - @field_validator("metadata_filter", mode="before") @classmethod def _parse_metadata_json(cls, v): diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 33ded8a1c..792411800 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -264,7 +264,6 @@ def list_references_page( include_tags: Sequence[str] | None = None, exclude_tags: Sequence[str] | None = None, metadata_filter: dict | None = None, - job_ids: Sequence[str] | None = None, sort: str | None = None, order: str | None = None, after_cursor_value: object | None = None, @@ -294,9 +293,6 @@ def list_references_page( escaped, esc = escape_sql_like_string(name_contains) base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc)) - if job_ids: - base = base.where(AssetReference.job_id.in_(list(job_ids))) - base = apply_tag_filters(base, include_tags, exclude_tags) base = apply_metadata_filter(base, metadata_filter) @@ -349,8 +345,6 @@ def list_references_page( count_stmt = count_stmt.where( AssetReference.name.ilike(f"%{escaped}%", escape=esc) ) - if job_ids: - count_stmt = count_stmt.where(AssetReference.job_id.in_(list(job_ids))) count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags) count_stmt = apply_metadata_filter(count_stmt, metadata_filter) diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index 53aec7a15..d4e4fc61c 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -274,7 +274,6 @@ def list_assets_page( exclude_tags: Sequence[str] | None = None, name_contains: str | None = None, metadata_filter: dict | None = None, - job_ids: Sequence[str] | None = None, limit: int = 20, offset: int = 0, sort: str = "created_at", @@ -320,7 +319,6 @@ def list_assets_page( exclude_tags=exclude_tags, name_contains=name_contains, metadata_filter=metadata_filter, - job_ids=job_ids, limit=fetch_limit, offset=offset, sort=sort, diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py index 3fbcc3eb0..20ebae155 100644 --- a/comfy_execution/jobs.py +++ b/comfy_execution/jobs.py @@ -25,10 +25,9 @@ def validate_job_id(value) -> str: Job ids must be UUIDs in the canonical lowercase hyphenated form. The id is stored and compared verbatim everywhere downstream — history keys, - websocket events, /interrupt matching, and the assets ``job_ids`` filter - (a String(36) column matched exactly) — so accepting another spelling - would either rewrite the client's id behind its back or mint a job whose - outputs the filter can never find. Rejecting loudly beats both. + websocket events, and /interrupt matching — so accepting another spelling + would silently rewrite the client's id and then miss every exact-match + lookup. Rejecting loudly beats that. Returns the id unchanged. Raises ValueError when the value is not a string in canonical UUID form. diff --git a/tests-unit/assets_test/queries/test_asset_info.py b/tests-unit/assets_test/queries/test_asset_info.py index ba729a270..fe510e342 100644 --- a/tests-unit/assets_test/queries/test_asset_info.py +++ b/tests-unit/assets_test/queries/test_asset_info.py @@ -158,56 +158,6 @@ class TestListReferencesPage: refs, _, _ = list_references_page(session, sort="name", order="asc") assert refs[0].name == "large" - def test_job_ids_filter(self, session: Session): - asset = _make_asset(session, "hash1") - job_a = str(uuid.uuid4()) - job_b = str(uuid.uuid4()) - ref_a = _make_reference(session, asset, name="from_job_a") - ref_a.job_id = job_a - ref_b = _make_reference(session, asset, name="from_job_b") - ref_b.job_id = job_b - _make_reference(session, asset, name="no_job") - session.commit() - - # Single job filter - refs, _, total = list_references_page(session, job_ids=[job_a]) - assert total == 1 - assert refs[0].name == "from_job_a" - - # Multi-job filter (IN) - refs, _, total = list_references_page(session, job_ids=[job_a, job_b]) - names = sorted(r.name for r in refs) - assert total == 2 - assert names == ["from_job_a", "from_job_b"] - - # Unknown job id matches nothing - refs, _, total = list_references_page(session, job_ids=[str(uuid.uuid4())]) - assert total == 0 - assert refs == [] - - # Empty/None means no filter -> all three references - refs, _, total = list_references_page(session, job_ids=[]) - assert total == 3 - refs, _, total = list_references_page(session, job_ids=None) - assert total == 3 - - def test_job_ids_combined_with_other_filters(self, session: Session): - asset = _make_asset(session, "hash1") - job_a = str(uuid.uuid4()) - ref_match = _make_reference(session, asset, name="match.bin") - ref_match.job_id = job_a - ref_wrong_name = _make_reference(session, asset, name="other.bin") - ref_wrong_name.job_id = job_a - ref_wrong_job = _make_reference(session, asset, name="match.bin") - ref_wrong_job.job_id = str(uuid.uuid4()) - session.commit() - - refs, _, total = list_references_page( - session, job_ids=[job_a], name_contains="match" - ) - assert total == 1 - assert refs[0].id == ref_match.id - class TestFetchReferenceAssetAndTags: def test_returns_none_for_nonexistent(self, session: Session): diff --git a/tests-unit/assets_test/queries/test_list_assets_query.py b/tests-unit/assets_test/queries/test_list_assets_query.py deleted file mode 100644 index e8d3430e2..000000000 --- a/tests-unit/assets_test/queries/test_list_assets_query.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Schema-level unit tests for ListAssetsQuery (no DB required).""" -import uuid - -import pytest -from pydantic import ValidationError - -from app.assets.api.schemas_in import ListAssetsQuery - - -class TestJobIdsValidator: - def test_csv_string_parses_and_canonicalizes(self): - a = "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE" - b = "11111111-2222-3333-4444-555555555555" - q = ListAssetsQuery.model_validate({"job_ids": f"{a},{b}"}) - # Canonicalized to lowercase - assert q.job_ids == [a.lower(), b] - - def test_repeated_query_params_as_list(self): - a = "11111111-1111-1111-1111-111111111111" - b = "22222222-2222-2222-2222-222222222222" - q = ListAssetsQuery.model_validate({"job_ids": [a, b]}) - assert q.job_ids == [a, b] - - def test_dedup_preserves_first_seen_order(self): - a = "11111111-1111-1111-1111-111111111111" - b = "22222222-2222-2222-2222-222222222222" - q = ListAssetsQuery.model_validate({"job_ids": [a, b, a]}) - assert q.job_ids == [a, b] - - def test_default_empty(self): - q = ListAssetsQuery.model_validate({}) - assert q.job_ids == [] - - def test_invalid_uuid_rejected(self): - with pytest.raises(ValidationError) as exc: - ListAssetsQuery.model_validate({"job_ids": "not-a-uuid"}) - assert "must be UUIDs" in str(exc.value) - - def test_non_string_list_item_rejected(self): - with pytest.raises(ValidationError) as exc: - ListAssetsQuery.model_validate( - {"job_ids": ["11111111-1111-1111-1111-111111111111", 42]} - ) - assert "must be strings" in str(exc.value) - - def test_non_string_non_list_value_rejected(self): - with pytest.raises(ValidationError) as exc: - ListAssetsQuery.model_validate({"job_ids": {"bad": "shape"}}) - assert "must be a string or list of strings" in str(exc.value) - - def test_max_length_enforced(self): - too_many = [str(uuid.uuid4()) for _ in range(501)] - with pytest.raises(ValidationError) as exc: - ListAssetsQuery.model_validate({"job_ids": too_many}) - assert exc.value.errors()[0]["type"] == "too_long" - - def test_max_length_boundary_accepted(self): - at_cap = [str(uuid.uuid4()) for _ in range(500)] - q = ListAssetsQuery.model_validate({"job_ids": at_cap}) - assert len(q.job_ids) == 500 diff --git a/tests-unit/assets_test/test_prompt_id_enforcement.py b/tests-unit/assets_test/test_prompt_id_enforcement.py index fb961beae..86a755c9f 100644 --- a/tests-unit/assets_test/test_prompt_id_enforcement.py +++ b/tests-unit/assets_test/test_prompt_id_enforcement.py @@ -1,9 +1,9 @@ """POST /prompt enforces canonical-UUID job ids at creation time. -Lives in assets_test because it uses this suite's booted-server fixture and -because the invariant exists for the assets pipeline: the GET /api/assets -``job_ids`` filter matches stored job ids exactly, so a job minted with a -non-canonical id would produce assets the filter can never find. +Lives in assets_test because it uses this suite's booted-server fixture. The +invariant itself is pipeline-wide: a job id is stored and compared verbatim +downstream — history keys, websocket correlation, and /interrupt matching — +so a job minted with a non-canonical id would miss every exact-match lookup. The prompt bodies here are intentionally invalid workflows — prompt_id validation happens before workflow validation, so a rejected id returns diff --git a/tests/execution/test_jobs.py b/tests/execution/test_jobs.py index 30e47071d..f7cb612e4 100644 --- a/tests/execution/test_jobs.py +++ b/tests/execution/test_jobs.py @@ -35,8 +35,8 @@ class TestValidateJobId: ) def test_non_canonical_spellings_rejected(self, variant): # uuid.UUID parses all of these, but accepting them would silently - # rewrite the client's id (history keys, websocket events, and the - # assets job_ids filter all match the stored form exactly). + # rewrite the client's id (history keys, websocket events, and + # /interrupt matching all match the stored form exactly). with pytest.raises(ValueError): validate_job_id(variant) From 74ee826790035be831c960e4c4bd60051273a99a Mon Sep 17 00:00:00 2001 From: Comfy Org PR Bot Date: Thu, 11 Jun 2026 12:15:53 +0900 Subject: [PATCH 04/52] chore(openapi): sync shared API contract from cloud@e3c52ad (#14406) --- openapi.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openapi.yaml b/openapi.yaml index 58614103a..6e203b1cd 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -1067,6 +1067,9 @@ components: comfyui_version: description: ComfyUI version type: string + deploy_environment: + description: How this ComfyUI instance is deployed (e.g. cloud, local-git, local-portable, local-desktop) + type: string embedded_python: description: Whether using embedded Python type: boolean From 33e6ebd0d92b270e9bd79ea74e967f7e23e7d7e8 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:27:08 -0700 Subject: [PATCH 05/52] I don't think this actually works anymore. (#14403) --- README.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/README.md b/README.md index dc2389266..ee1024de5 100644 --- a/README.md +++ b/README.md @@ -462,16 +462,6 @@ To use the most up-to-date frontend version: This approach allows you to easily switch between the stable fortnightly release and the cutting-edge daily updates, or even specific versions for testing purposes. -### Accessing the Legacy Frontend - -If you need to use the legacy frontend for any reason, you can access it using the following command line argument: - -``` ---front-end-version Comfy-Org/ComfyUI_legacy_frontend@latest -``` - -This will use a snapshot of the legacy frontend preserved in the [ComfyUI Legacy Frontend repository](https://github.com/Comfy-Org/ComfyUI_legacy_frontend). - # QA ### Which GPU should I buy for this? From bda19b26048843f32ef41e94f83b4ef49d16c254 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Thu, 11 Jun 2026 13:32:57 +1000 Subject: [PATCH 06/52] ops: tolerate already force casted dynamic weight (#14410) Some custom nodes .to weights completely out of load context which can wreak havoc if its for a model that is not active. Detect this condition and just let it fall-through to the non-dynamic loader straight up. --- comfy/ops.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/comfy/ops.py b/comfy/ops.py index 119177c37..3c9912aae 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -299,21 +299,21 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of non_blocking = comfy.model_management.device_supports_non_blocking(device) - if hasattr(s, "_v"): + if hasattr(s, "_v") and comfy.model_management.is_device_cpu(device): #vbar doesn't support CPU weights, but some custom nodes have weird paths #that might switch the layer to the CPU and expect it to work. We have to take #a clone conservatively as we are mmapped and some SFT files are packed misaligned #If you are a custom node author reading this, please move your layer to the GPU #or declare your ModelPatcher as CPU in the first place. - if comfy.model_management.is_device_cpu(device): - materialize_meta_param(s, ["weight", "bias"]) - weight = s.weight.to(dtype=dtype, copy=True) - if isinstance(weight, QuantizedTensor): - weight = weight.dequantize() - bias = s.bias.to(dtype=bias_dtype, copy=True) if s.bias is not None else None - return format_return((weight, bias, (None, None, None)), offloadable) + materialize_meta_param(s, ["weight", "bias"]) + weight = s.weight.to(dtype=dtype, copy=True) + if isinstance(weight, QuantizedTensor): + weight = weight.dequantize() + bias = s.bias.to(dtype=bias_dtype, copy=True) if s.bias is not None else None + return format_return((weight, bias, (None, None, None)), offloadable) + elif hasattr(s, "_v") and s.weight.device != device: prefetched = hasattr(s, "_prefetch") offload_stream = None offload_device = None From 91187c58d946f237a051a98eb3ef3ccd28cf926f Mon Sep 17 00:00:00 2001 From: Barish Ozbay <17261091+drozbay@users.noreply.github.com> Date: Wed, 10 Jun 2026 23:37:43 -0600 Subject: [PATCH 07/52] Improve context window resizing for SCAIL2 (CORE-286) (#14394) --- comfy/model_base.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index 2289e0812..ab4a11022 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1816,7 +1816,24 @@ class WAN21_SCAIL2(WAN21_SCAIL): def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]): if cond_key in ("sam_latents", "pose_latents"): - return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=1) + # Return sliced view omitting retain_index_list + return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=0) + if cond_key == "ref_mask_latents" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor): + # The ref mask is just a single frame padded with frames of zeros, so just grab the first frames for all windows + full_ref_mask = cond_value.cond + video_frame_count = x_in.shape[2] + if full_ref_mask.shape[2] != video_frame_count + 1: + return None + window_length = len(window.index_list) + + # Account for the causal anchor frame if it exists + anchor_index = getattr(window, "causal_anchor_index", None) + if anchor_index is not None and anchor_index >= 0: + window_length += 1 + + window_ref_mask = full_ref_mask[:, :, :window_length + 1].to(device) + return cond_value._copy_with(window_ref_mask) + return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) def concat_cond(self, **kwargs): From b97e60fc6beb795adaae404f5d43101dacf12f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:17:04 +0300 Subject: [PATCH 08/52] Fix SCAIL-2 reference mask background convention (#14415) --- comfy_extras/nodes_scail.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/comfy_extras/nodes_scail.py b/comfy_extras/nodes_scail.py index a740442de..bba0942d7 100644 --- a/comfy_extras/nodes_scail.py +++ b/comfy_extras/nodes_scail.py @@ -267,7 +267,8 @@ class SCAIL2ColoredMask(io.ComfyNode): io.Combo.Input("sort_by", options=["none", "left_to_right", "area"], default="left_to_right", tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). left_to_right = leftmost object (by first-frame centroid) gets the first color; area = biggest object (by first-frame mask area) gets the first color; none = keep SAM3's order."), io.Boolean.Input("replacement_mode", default=False, - tooltip="False = mask_video has black bg (Animation Mode). True = white bg (Replacement Mode). Set the matching replacement_mode on WanSCAILToVideo. reference_image_mask is always black-bg regardless."), + tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). " + "True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."), ], outputs=[ io.Image.Output("pose_video_mask"), @@ -296,14 +297,17 @@ class SCAIL2ColoredMask(io.ComfyNode): return td drv = _prep(driving_track_data) + # Animation: driving=black, ref=white. Replacement: driving=white, ref=black. mask_video = _render_colored_masks(drv, "white" if replacement_mode else "black") + ref_bg = "black" if replacement_mode else "white" if ref_track_data is not None: ref = _prep(ref_track_data) - reference_image_mask = _render_colored_masks(ref, "black") + reference_image_mask = _render_colored_masks(ref, ref_bg) else: H, W = drv["orig_size"] - reference_image_mask = torch.zeros(1, H, W, 3, device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) + fill_value = 1.0 if ref_bg == "white" else 0.0 + reference_image_mask = torch.full((1, H, W, 3), fill_value, device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) return io.NodeOutput(mask_video, reference_image_mask) From ef470b61e4eab7de3319a83e689a9f236138102f Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:28:17 +0300 Subject: [PATCH 09/52] [Partner Nodes] fix(GPT Image): handle mismatched image sizes returned when size="auto" (#14414) Signed-off-by: bigcat88 --- comfy_api_nodes/nodes_openai.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index 0fe5fb9d0..ad62f2164 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -9,6 +9,7 @@ from PIL import Image from typing_extensions import override import folder_paths +from comfy.utils import common_upscale from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.openai import ( InputFileContent, @@ -62,7 +63,8 @@ async def validate_and_cast_response(response, timeout: int = None) -> torch.Ten timeout: Request timeout in seconds. Defaults to None (no timeout). Returns: - A torch.Tensor representing the image (1, H, W, C). + A torch.Tensor of shape (N, H, W, C) with all returned images; images whose + dimensions differ from the first image's are resized to match it. Raises: ValueError: If the response is not valid. @@ -89,6 +91,14 @@ async def validate_and_cast_response(response, timeout: int = None) -> torch.Ten arr = np.asarray(pil_img).astype(np.float32) / 255.0 image_tensors.append(torch.from_numpy(arr)) + # With size="auto" the API can return images whose dimensions differ by a few pixels within a single response + # resize them to the first image's dimensions so they can be stacked into one batch. + ref_h, ref_w = image_tensors[0].shape[:2] + for i, t in enumerate(image_tensors): + if t.shape[:2] != (ref_h, ref_w): + samples = t.unsqueeze(0).movedim(-1, 1) + samples = common_upscale(samples, ref_w, ref_h, "bilinear", "center") + image_tensors[i] = samples.movedim(1, -1).squeeze(0) return torch.stack(image_tensors, dim=0) From fb991e2c1e7476809d566a4620c2132e05a466dd Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:43:35 +0300 Subject: [PATCH 10/52] [Partner Nodes] fix(KlingTextToVideoNode): validation error for "kling-v2-master" model (#14418) Signed-off-by: bigcat88 --- comfy_api_nodes/apis/__init__.py | 9 +-------- comfy_api_nodes/nodes_kling.py | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py index 9c4cfb9b6..9a7049ea2 100644 --- a/comfy_api_nodes/apis/__init__.py +++ b/comfy_api_nodes/apis/__init__.py @@ -1310,13 +1310,6 @@ class KlingTaskStatus(str, Enum): failed = 'failed' -class KlingTextToVideoModelName(str, Enum): - kling_v1 = 'kling-v1' - kling_v1_6 = 'kling-v1-6' - kling_v2_1_master = 'kling-v2-1-master' - kling_v2_5_turbo = 'kling-v2-5-turbo' - - class KlingVideoGenAspectRatio(str, Enum): field_16_9 = '16:9' field_9_16 = '9:16' @@ -5179,7 +5172,7 @@ class KlingText2VideoRequest(BaseModel): duration: Optional[KlingVideoGenDuration] = '5' external_task_id: Optional[str] = Field(None, description='Customized Task ID') mode: Optional[KlingVideoGenMode] = 'std' - model_name: Optional[KlingTextToVideoModelName] = 'kling-v1' + model_name: Optional[str] = 'kling-v1' negative_prompt: Optional[str] = Field( None, description='Negative text prompt', max_length=2500 ) diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index d11e42540..c81d3503d 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -436,7 +436,7 @@ async def execute_text2video( negative_prompt=negative_prompt if negative_prompt else None, duration=KlingVideoGenDuration(duration), mode=KlingVideoGenMode(model_mode), - model_name=KlingVideoGenModelName(model_name), + model_name=model_name, cfg_scale=cfg_scale, aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), camera_control=camera_control, From befc321438bd170b647ea1b4179b00b616858c9c Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Thu, 11 Jun 2026 15:45:22 -0700 Subject: [PATCH 11/52] Make --enable-manager-legacy-ui imply --enable-manager (#14421) --- README.md | 2 +- comfy/cli_args.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ee1024de5..786a14166 100644 --- a/README.md +++ b/README.md @@ -364,7 +364,7 @@ For models compatible with Iluvatar Extension for PyTorch. Here's a step-by-step | Flag | Description | |------|-------------| | `--enable-manager` | Enable ComfyUI-Manager | -| `--enable-manager-legacy-ui` | Use the legacy manager UI instead of the new UI (requires `--enable-manager`) | +| `--enable-manager-legacy-ui` | Use the legacy manager UI instead of the new UI (implies `--enable-manager`) | | `--disable-manager-ui` | Disable the manager UI and endpoints while keeping background features like security checks and scheduled installation completion (requires `--enable-manager`) | diff --git a/comfy/cli_args.py b/comfy/cli_args.py index cba0dfa34..22f621cf5 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -133,7 +133,7 @@ upcast.add_argument("--dont-upcast-attention", action="store_true", help="Disabl parser.add_argument("--enable-manager", action="store_true", help="Enable the ComfyUI-Manager feature.") manager_group = parser.add_mutually_exclusive_group() manager_group.add_argument("--disable-manager-ui", action="store_true", help="Disables only the ComfyUI-Manager UI and endpoints. Scheduled installations and similar background tasks will still operate.") -manager_group.add_argument("--enable-manager-legacy-ui", action="store_true", help="Enables the legacy UI of ComfyUI-Manager") +manager_group.add_argument("--enable-manager-legacy-ui", action="store_true", help="Enables the legacy UI of ComfyUI-Manager. Implies --enable-manager.") vram_group = parser.add_mutually_exclusive_group() @@ -258,6 +258,10 @@ if args.disable_auto_launch: if args.force_fp16: args.fp16_unet = True +# '--enable-manager-legacy-ui' is meaningless unless the manager is enabled, so imply '--enable-manager'. +if args.enable_manager_legacy_ui: + args.enable_manager = True + # '--fast' is not provided, use an empty set if args.fast is None: From 10d466b0e3a8c48f7dcb80def8feee0dc3345543 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:38:06 -0700 Subject: [PATCH 12/52] Don't crash when using flux kv cache with split batches. (#14422) --- comfy_extras/nodes_flux.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py index afc663b22..ef1757ae5 100644 --- a/comfy_extras/nodes_flux.py +++ b/comfy_extras/nodes_flux.py @@ -245,6 +245,11 @@ class KV_Attn_Input: cache_key = "{}_{}".format(extra_options["block_type"], extra_options["block_index"]) if cache_key in self.cache: kk, vv = self.cache[cache_key] + + # Fix batch size changing. + kk = comfy.utils.repeat_to_batch_size(kk, k.shape[0]) + vv = comfy.utils.repeat_to_batch_size(vv, v.shape[0]) + self.set_cache = False return {"q": q, "k": torch.cat((k, kk), dim=2), "v": torch.cat((v, vv), dim=2)} From bc5f8eca3bd017542ffbb4e53ca76b0b5abfed2f Mon Sep 17 00:00:00 2001 From: Robin Huang Date: Thu, 11 Jun 2026 18:20:44 -0700 Subject: [PATCH 13/52] Add Comfy-Usage-Source pass-through for API node requests (#14404) --- comfy_api/latest/_io.py | 10 +++++++++- comfy_api_nodes/nodes_sonilo.py | 5 ++--- comfy_api_nodes/util/_helpers.py | 25 ++++++++++++++++++++++++ comfy_api_nodes/util/client.py | 7 ++----- comfy_api_nodes/util/download_helpers.py | 4 ++-- execution.py | 4 ++++ server.py | 5 +++++ 7 files changed, 49 insertions(+), 11 deletions(-) diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index 37614a4c3..012fae3ac 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -1400,7 +1400,8 @@ class V3Data(TypedDict): class HiddenHolder: def __init__(self, unique_id: str, prompt: Any, extra_pnginfo: Any, dynprompt: Any, - auth_token_comfy_org: str, api_key_comfy_org: str, **kwargs): + auth_token_comfy_org: str, api_key_comfy_org: str, + comfy_usage_source: str = None, **kwargs): self.unique_id = unique_id """UNIQUE_ID is the unique identifier of the node, and matches the id property of the node on the client side. It is commonly used in client-server communications (see messages).""" self.prompt = prompt @@ -1413,6 +1414,8 @@ class HiddenHolder: """AUTH_TOKEN_COMFY_ORG is a token acquired from signing into a ComfyOrg account on frontend.""" self.api_key_comfy_org = api_key_comfy_org """API_KEY_COMFY_ORG is an API Key generated by ComfyOrg that allows skipping signing into a ComfyOrg account on frontend.""" + self.comfy_usage_source = comfy_usage_source + """COMFY_USAGE_SOURCE identifies the client that submitted the prompt (e.g. comfyui-frontend, comfy-cli, comfyui-mcp); forwarded to API nodes' upstream requests via the Comfy-Usage-Source header.""" def __getattr__(self, key: str): '''If hidden variable not found, return None.''' @@ -1429,6 +1432,7 @@ class HiddenHolder: dynprompt=d.get(Hidden.dynprompt, None), auth_token_comfy_org=d.get(Hidden.auth_token_comfy_org, None), api_key_comfy_org=d.get(Hidden.api_key_comfy_org, None), + comfy_usage_source=d.get(Hidden.comfy_usage_source, None), ) @classmethod @@ -1451,6 +1455,8 @@ class Hidden(str, Enum): """AUTH_TOKEN_COMFY_ORG is a token acquired from signing into a ComfyOrg account on frontend.""" api_key_comfy_org = "API_KEY_COMFY_ORG" """API_KEY_COMFY_ORG is an API Key generated by ComfyOrg that allows skipping signing into a ComfyOrg account on frontend.""" + comfy_usage_source = "COMFY_USAGE_SOURCE" + """COMFY_USAGE_SOURCE identifies the client that submitted the prompt (e.g. comfyui-frontend, comfy-cli, comfyui-mcp); forwarded to API nodes' upstream requests via the Comfy-Usage-Source header.""" @dataclass @@ -1654,6 +1660,8 @@ class Schema: self.hidden.append(Hidden.auth_token_comfy_org) if Hidden.api_key_comfy_org not in self.hidden: self.hidden.append(Hidden.api_key_comfy_org) + if Hidden.comfy_usage_source not in self.hidden: + self.hidden.append(Hidden.comfy_usage_source) # if is an output_node, will need prompt and extra_pnginfo if self.is_output_node: if Hidden.prompt not in self.hidden: diff --git a/comfy_api_nodes/nodes_sonilo.py b/comfy_api_nodes/nodes_sonilo.py index 9ce896ed0..24a9a0b06 100644 --- a/comfy_api_nodes/nodes_sonilo.py +++ b/comfy_api_nodes/nodes_sonilo.py @@ -16,7 +16,7 @@ from comfy_api_nodes.util import ( ) from comfy_api_nodes.util._helpers import ( default_base_url, - get_auth_header, + get_comfy_api_headers, get_node_id, is_processing_interrupted, ) @@ -174,8 +174,7 @@ async def _stream_sonilo_music( """POST ``form`` to Sonilo, read the NDJSON stream, and return the first stream's audio bytes.""" url = urljoin(default_base_url().rstrip("/") + "/", endpoint.path.lstrip("/")) - headers: dict[str, str] = {} - headers.update(get_auth_header(cls)) + headers = get_comfy_api_headers(cls) headers.update(endpoint.headers) node_id = get_node_id(cls) diff --git a/comfy_api_nodes/util/_helpers.py b/comfy_api_nodes/util/_helpers.py index 648defe3d..83cf7b001 100644 --- a/comfy_api_nodes/util/_helpers.py +++ b/comfy_api_nodes/util/_helpers.py @@ -9,6 +9,7 @@ from io import BytesIO from yarl import URL from comfy.cli_args import args +from comfy.deploy_environment import get_deploy_environment from comfy.model_management import processing_interrupted from comfy_api.latest import IO @@ -35,6 +36,30 @@ def get_auth_header(node_cls: type[IO.ComfyNode]) -> dict[str, str]: return {} +def get_usage_source(node_cls: type[IO.ComfyNode]) -> str: + """Source of the prompt that triggered this API node. + + Defaults to "comfyui-api" when the submitting client didn't identify itself, + i.e. a direct API call to this server. + """ + return node_cls.hidden.comfy_usage_source or "comfyui-api" + + +def get_comfy_api_headers(node_cls: type[IO.ComfyNode]) -> dict[str, str]: + """Common headers (auth, deploy environment, usage source) for Comfy API requests. + + Centralizes the shared header set so every Comfy API request sends a consistent + set and new shared headers only need to be added in one place. Intended for + relative/cloud URLs resolved against ``default_base_url()``; because the result + includes auth, callers must not attach it to arbitrary absolute/presigned URLs. + """ + return { + **get_auth_header(node_cls), + "Comfy-Env": get_deploy_environment(), + "Comfy-Usage-Source": get_usage_source(node_cls), + } + + def default_base_url() -> str: return getattr(args, "comfy_api_base", "https://api.comfy.org") diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py index 57c501724..adcde7bcb 100644 --- a/comfy_api_nodes/util/client.py +++ b/comfy_api_nodes/util/client.py @@ -19,12 +19,10 @@ from comfy import utils from comfy_api.latest import IO from server import PromptServer -from comfy.deploy_environment import get_deploy_environment - from . import request_logger from ._helpers import ( default_base_url, - get_auth_header, + get_comfy_api_headers, get_node_id, is_processing_interrupted, sleep_with_interrupt, @@ -645,8 +643,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): payload_headers = {"Accept": "*/*"} if expect_binary else {"Accept": "application/json"} if not parsed_url.scheme and not parsed_url.netloc: # is URL relative? - payload_headers.update(get_auth_header(cfg.node_cls)) - payload_headers["Comfy-Env"] = get_deploy_environment() + payload_headers.update(get_comfy_api_headers(cfg.node_cls)) if cfg.endpoint.headers: payload_headers.update(cfg.endpoint.headers) diff --git a/comfy_api_nodes/util/download_helpers.py b/comfy_api_nodes/util/download_helpers.py index aa588d038..0ec3c6e66 100644 --- a/comfy_api_nodes/util/download_helpers.py +++ b/comfy_api_nodes/util/download_helpers.py @@ -17,7 +17,7 @@ from folder_paths import get_output_directory from . import request_logger from ._helpers import ( default_base_url, - get_auth_header, + get_comfy_api_headers, is_processing_interrupted, sleep_with_interrupt, to_aiohttp_url, @@ -64,7 +64,7 @@ async def download_url_to_bytesio( if cls is None: raise ValueError("For relative 'cloud' paths, the `cls` parameter is required.") url = urljoin(default_base_url().rstrip("/") + "/", url.lstrip("/")) - headers = get_auth_header(cls) + headers = get_comfy_api_headers(cls) while True: attempt += 1 diff --git a/execution.py b/execution.py index e6c6f39d6..9e16e451d 100644 --- a/execution.py +++ b/execution.py @@ -200,6 +200,8 @@ def get_input_data(inputs, class_def, unique_id, execution_list=None, dynprompt= hidden_inputs_v3[io.Hidden.auth_token_comfy_org] = extra_data.get("auth_token_comfy_org", None) if io.Hidden.api_key_comfy_org.name in hidden: hidden_inputs_v3[io.Hidden.api_key_comfy_org] = extra_data.get("api_key_comfy_org", None) + if io.Hidden.comfy_usage_source.name in hidden: + hidden_inputs_v3[io.Hidden.comfy_usage_source] = extra_data.get("comfy_usage_source", None) else: if "hidden" in valid_inputs: h = valid_inputs["hidden"] @@ -216,6 +218,8 @@ def get_input_data(inputs, class_def, unique_id, execution_list=None, dynprompt= input_data_all[x] = [extra_data.get("auth_token_comfy_org", None)] if h[x] == "API_KEY_COMFY_ORG": input_data_all[x] = [extra_data.get("api_key_comfy_org", None)] + if h[x] == "COMFY_USAGE_SOURCE": + input_data_all[x] = [extra_data.get("comfy_usage_source", None)] v3_data["hidden_inputs"] = hidden_inputs_v3 return input_data_all, missing_keys, v3_data diff --git a/server.py b/server.py index cc3b33a5c..ccc92e5ab 100644 --- a/server.py +++ b/server.py @@ -971,6 +971,11 @@ class PromptServer(): if "client_id" in json_data: extra_data["client_id"] = json_data["client_id"] + + if "comfy_usage_source" not in extra_data: + usage_source = request.headers.get("Comfy-Usage-Source") + if usage_source: + extra_data["comfy_usage_source"] = usage_source if valid[0]: outputs_to_execute = valid[2] sensitive = {} From 822aca19836cd75c815631db23c3ad742d1f7d5e Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Fri, 12 Jun 2026 04:24:54 +0300 Subject: [PATCH 14/52] [Partner Nodes] feat: enable Bria Replace Background node (#14397) --- comfy_api_nodes/nodes_bria.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/comfy_api_nodes/nodes_bria.py b/comfy_api_nodes/nodes_bria.py index e138fafa9..090154afb 100644 --- a/comfy_api_nodes/nodes_bria.py +++ b/comfy_api_nodes/nodes_bria.py @@ -289,7 +289,7 @@ class BriaRemoveVideoBackground(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + expr="""{"type":"usd","usd":0.0042,"format":{"suffix":"/second"}}""", ), ) @@ -357,7 +357,7 @@ class BriaVideoGreenScreen(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + expr="""{"type":"usd","usd":0.0042,"format":{"suffix":"/second"}}""", ), ) @@ -433,7 +433,7 @@ class BriaVideoReplaceBackground(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + expr="""{"type":"usd","usd":0.0042,"format":{"suffix":"/second"}}""", ), ) @@ -452,7 +452,10 @@ class BriaVideoReplaceBackground(IO.ComfyNode): validate_video_duration(background_video, max_duration=60.0) background_url = await upload_video_to_comfyapi(cls, background_video, wait_label="Uploading background") else: - background_url = await upload_image_to_comfyapi(cls, background_image, wait_label="Uploading background") + # Bria's replace_background 500s on RGBA, so drop the alpha channel before upload. + background_url = await upload_image_to_comfyapi( + cls, background_image[:, :, :, :3], wait_label="Uploading background" + ) response = await sync_op( cls, ApiEndpoint(path="/proxy/bria/v2/video/edit/replace_background", method="POST"), @@ -530,7 +533,7 @@ class BriaTransparentVideoBackground(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + expr="""{"type":"usd","usd":0.0042,"format":{"suffix":"/second"}}""", ), ) @@ -571,7 +574,7 @@ class BriaExtension(ComfyExtension): BriaRemoveImageBackground, BriaRemoveVideoBackground, BriaVideoGreenScreen, - # BriaVideoReplaceBackground, # server returns Status 500 when we pass background video + BriaVideoReplaceBackground, BriaTransparentVideoBackground, ] From 02656ea0bb75920621af9c9590de6db2a71a1a64 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Fri, 12 Jun 2026 07:51:12 -0700 Subject: [PATCH 15/52] Fix potential dtype issue with ideogram 4. (#14436) --- comfy/ldm/ideogram4/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comfy/ldm/ideogram4/model.py b/comfy/ldm/ideogram4/model.py index b86c65bf0..4ea5b8aaf 100644 --- a/comfy/ldm/ideogram4/model.py +++ b/comfy/ldm/ideogram4/model.py @@ -106,11 +106,11 @@ class Ideogram4EmbedScalar(nn.Module): self.mlp_in = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device) self.mlp_out = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device) - def forward(self, x): + def forward(self, x, dtype): x = x.to(torch.float32) scaled = 1e4 * (x - self.range_min) / (self.range_max - self.range_min) emb = _sinusoidal_embedding(scaled, self.dim) - emb = emb.to(self.mlp_in.weight.dtype) + emb = emb.to(dtype) emb = F.silu(self.mlp_in(emb)) return self.mlp_out(emb) @@ -161,7 +161,7 @@ class Ideogram4Transformer(nn.Module): x = x * output_image_mask h = self.input_proj(x) * output_image_mask - t_cond = self.t_embedding(t) + t_cond = self.t_embedding(t, dtype=x.dtype) if t.dim() == 1: t_cond = t_cond.unsqueeze(1) adaln_input = F.silu(self.adaln_proj(t_cond)) From d7a552720cf92fb8a7e34a2165a144ff9056dbe1 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Sat, 13 Jun 2026 00:53:33 +1000 Subject: [PATCH 16/52] add --high-ram option (#14437) Add this option for users who know they have so much ram they want to pin everything or have a pagefile that outruns their disk speed. The removes the RAM pressure caps completely and pins behind the primary model load forcing all models to be permanently comitted to RAM. --- comfy/cli_args.py | 4 ++++ comfy/model_management.py | 4 ++++ comfy/ops.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 22f621cf5..e7ee0d5eb 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -115,6 +115,7 @@ cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metav cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.") cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.") cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.") +cache_group.add_argument("--high-ram", action="store_true", help="Can improve performance slightly on high RAM or on systems where pagefile use is preferred over model loading.") attn_group = parser.add_mutually_exclusive_group() attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.") @@ -249,6 +250,9 @@ else: if args.cache_ram is not None and len(args.cache_ram) > 2: parser.error("--cache-ram accepts at most two values: active GB and inactive GB") +if args.high_ram: + args.cache_classic = True + if args.windows_standalone_build: args.auto_launch = True diff --git a/comfy/model_management.py b/comfy/model_management.py index 55ddaab8e..b15d08ba1 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -643,6 +643,8 @@ def free_pins(size, evict_active=False): return freed_total def ensure_pin_budget(size, evict_active=False): + if args.high_ram: + return True if args.fast_disk: shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY else: @@ -1496,6 +1498,8 @@ if not args.disable_pinned_memory: PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"]) def pinned_hostbuf_size(size): + if args.high_ram: + return max(0, int(size * 2)) return max(0, int(min(size, MAX_PINNED_MEMORY) * 2)) def discard_cuda_async_error(): diff --git a/comfy/ops.py b/comfy/ops.py index 3c9912aae..3f088a962 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -180,7 +180,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin if pin is not None: cast_maybe_lowvram_patch([pin], dest, offload_stream) return - if signature is None: + if signature is None or args.high_ram: comfy.pinned_memory.pin_memory(m, subset=subset, size=size) pin = comfy.pinned_memory.get_pin(m, subset=subset) cast_maybe_lowvram_patch(source, pin, offload_stream, xfer_dest2=dest) From 28a40fb2b2b30a6fcd45ff824cc6f1093e26ee90 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:17:11 +0300 Subject: [PATCH 17/52] [Partner Nodes] feat: add Runway Aleph2 node (#14306) Signed-off-by: bigcat88 --- comfy_api_nodes/apis/runway.py | 152 +++++++++++++- comfy_api_nodes/nodes_runway.py | 359 ++++++++++++++++++++++++++++++-- 2 files changed, 481 insertions(+), 30 deletions(-) diff --git a/comfy_api_nodes/apis/runway.py b/comfy_api_nodes/apis/runway.py index df6f2b845..6878aa6f0 100644 --- a/comfy_api_nodes/apis/runway.py +++ b/comfy_api_nodes/apis/runway.py @@ -67,15 +67,6 @@ class RunwayImageToVideoResponse(BaseModel): id: Optional[str] = Field(None, description='Task ID') -class RunwayTaskStatusEnum(str, Enum): - SUCCEEDED = 'SUCCEEDED' - RUNNING = 'RUNNING' - FAILED = 'FAILED' - PENDING = 'PENDING' - CANCELLED = 'CANCELLED' - THROTTLED = 'THROTTLED' - - class RunwayTaskStatusResponse(BaseModel): createdAt: datetime = Field(..., description='Task creation timestamp') id: str = Field(..., description='Task ID') @@ -86,7 +77,7 @@ class RunwayTaskStatusResponse(BaseModel): ge=0.0, le=1.0, ) - status: RunwayTaskStatusEnum + status: str = Field(..., description="SUCCEEDED, RUNNING, FAILED, PENDING, CANCELLED or THROTTLED") class Model4(str, Enum): @@ -125,3 +116,144 @@ class RunwayTextToImageRequest(BaseModel): class RunwayTextToImageResponse(BaseModel): id: Optional[str] = Field(None, description='Task ID') + + +class RunwayAleph2IO: + """Custom socket types for chaining Aleph2 guidance images.""" + + KEYFRAME = "RUNWAY_ALEPH2_KEYFRAME" + PROMPT_IMAGE = "RUNWAY_ALEPH2_PROMPT_IMAGE" + + +# Keyframe timing modes (anchored to the INPUT video). Stored on the chain item and used to +# choose the request model below. The values match the Aleph2 keyframe union field names. +KEYFRAME_MODE_SECONDS = "seconds" # absolute time, in seconds, from the start of the input video +KEYFRAME_MODE_AT = "at" # fraction [0.0, 1.0] of the input video duration + +# Prompt-image position modes (anchored to the OUTPUT video). Values match the Aleph2 position `type`. +PROMPT_IMAGE_MODE_TIMESTAMP = "timestamp" # absolute time, in seconds, from the start of the output video +PROMPT_IMAGE_MODE_POSITION = "position" # fraction [0.0, 1.0] of the output video duration + + +class RunwayAleph2KeyframeItem: + """A guidance image anchored to a point of the INPUT video (one Aleph2 ``keyframe``).""" + + def __init__(self, image, mode: str, value: float): + self.image = image + self.mode = mode # KEYFRAME_MODE_SECONDS | KEYFRAME_MODE_AT + self.value = value + + +class RunwayAleph2KeyframeChain: + """An ordered collection of keyframes, built by chaining Runway Aleph2 Keyframe nodes.""" + + def __init__(self): + self.items: list[RunwayAleph2KeyframeItem] = [] + + def add(self, item: RunwayAleph2KeyframeItem) -> None: + self.items.append(item) + + def clone(self) -> "RunwayAleph2KeyframeChain": + c = RunwayAleph2KeyframeChain() + c.items = list(self.items) + return c + + +class RunwayAleph2PromptImageItem: + """A guidance image anchored to a point of the OUTPUT video (one Aleph2 ``promptImage``).""" + + def __init__(self, image, mode: str, value: float): + self.image = image + self.mode = mode # PROMPT_IMAGE_MODE_TIMESTAMP | PROMPT_IMAGE_MODE_POSITION + self.value = value + + +class RunwayAleph2PromptImageChain: + """An ordered collection of prompt images, built by chaining Runway Aleph2 Prompt Image nodes.""" + + def __init__(self): + self.items: list[RunwayAleph2PromptImageItem] = [] + + def add(self, item: RunwayAleph2PromptImageItem) -> None: + self.items.append(item) + + def clone(self) -> "RunwayAleph2PromptImageChain": + c = RunwayAleph2PromptImageChain() + c.items = list(self.items) + return c + + +class RunwayAleph2KeyframeSeconds(BaseModel): + seconds: float = Field( + ..., + description="Absolute timestamp in seconds from the start of the input video when this guidance image should apply.", + ge=0.0, + ) + uri: str = Field(...) + + +class RunwayAleph2KeyframeAt(BaseModel): + at: float = Field( + ..., + description="Position as a fraction [0.0, 1.0] of the input video duration.", + ge=0.0, + le=1.0, + ) + uri: str = Field(...) + + +class RunwayAleph2TimestampPosition(BaseModel): + type: str = Field(default="timestamp") + timestampSeconds: float = Field( + ..., + description="Absolute timestamp in seconds from the start of the output video.", + ge=0.0, + ) + + +class RunwayAleph2RelativePosition(BaseModel): + type: str = Field(default="position") + positionPercentage: float = Field( + ..., + description="Position as a fraction [0.0, 1.0] of the total output video duration.", + ge=0.0, + le=1.0, + ) + + +class RunwayAleph2PromptImage(BaseModel): + position: RunwayAleph2TimestampPosition | RunwayAleph2RelativePosition + uri: str = Field(...) + + +class RunwayAleph2ContentModeration(BaseModel): + publicFigureThreshold: str = Field( + ..., + description='When set to "low", the content moderation system is less strict about ' + 'recognizable public figures. One of "auto" or "low".', + ) + + +class RunwayAleph2Request(BaseModel): + model: str = Field(default="aleph2") + promptText: str = Field( + ..., + description="A non-empty string describing what should appear in the output.", + min_length=1, + max_length=1000, + ) + videoUri: str = Field(...) + seed: int = Field(..., description="Random seed for generation", ge=0, le=4294967295) + contentModeration: RunwayAleph2ContentModeration = Field(...) + keyframes: list[RunwayAleph2KeyframeSeconds | RunwayAleph2KeyframeAt] | None = Field( + None, + description="Timed guidance images placed at specific points in the input video. Up to 5.", + ) + promptImage: list[RunwayAleph2PromptImage] | None = Field( + None, + description="Up to 5 image keyframes for guiding the edit at specific points in the output video.", + ) + + +class RunwayAleph2Response(BaseModel): + id: str | None = Field(None, description="Task ID") diff --git a/comfy_api_nodes/nodes_runway.py b/comfy_api_nodes/nodes_runway.py index b9c5c81a1..013a193d9 100644 --- a/comfy_api_nodes/nodes_runway.py +++ b/comfy_api_nodes/nodes_runway.py @@ -30,13 +30,33 @@ from comfy_api_nodes.apis.runway import ( Model4, ReferenceImage, RunwayTextToImageAspectRatioEnum, + RunwayAleph2IO, + RunwayAleph2KeyframeChain, + RunwayAleph2KeyframeItem, + RunwayAleph2PromptImageChain, + RunwayAleph2PromptImageItem, + RunwayAleph2Request, + RunwayAleph2Response, + RunwayAleph2KeyframeSeconds, + RunwayAleph2KeyframeAt, + RunwayAleph2PromptImage, + RunwayAleph2TimestampPosition, + RunwayAleph2RelativePosition, + RunwayAleph2ContentModeration, + KEYFRAME_MODE_SECONDS, + KEYFRAME_MODE_AT, + PROMPT_IMAGE_MODE_TIMESTAMP, + PROMPT_IMAGE_MODE_POSITION, ) from comfy_api_nodes.util import ( image_tensor_pair_to_batch, validate_string, validate_image_dimensions, validate_image_aspect_ratio, + validate_video_duration, upload_images_to_comfyapi, + upload_image_to_comfyapi, + upload_video_to_comfyapi, download_url_to_video_output, download_url_to_image_tensor, ApiEndpoint, @@ -45,6 +65,7 @@ from comfy_api_nodes.util import ( ) PATH_IMAGE_TO_VIDEO = "/proxy/runway/image_to_video" +PATH_VIDEO_TO_VIDEO = "/proxy/runway/video_to_video" PATH_TEXT_TO_IMAGE = "/proxy/runway/text_to_image" PATH_GET_TASK_STATUS = "/proxy/runway/tasks" @@ -53,12 +74,6 @@ AVERAGE_DURATION_FLF_SECONDS = 256 AVERAGE_DURATION_T2I_SECONDS = 41 -class RunwayApiError(Exception): - """Base exception for Runway API errors.""" - - pass - - class RunwayGen4TurboAspectRatio(str, Enum): """Aspect ratios supported for Image to Video API when using gen4_turbo model.""" @@ -84,14 +99,6 @@ def get_video_url_from_task_status(response: TaskStatusResponse) -> str | None: return None -def extract_progress_from_task_status( - response: TaskStatusResponse, -) -> float | None: - if hasattr(response, "progress") and response.progress is not None: - return response.progress * 100 - return None - - def get_image_url_from_task_status(response: TaskStatusResponse) -> str | None: """Returns the image URL from the task status response if it exists.""" if hasattr(response, "output") and len(response.output) > 0: @@ -102,14 +109,13 @@ def get_image_url_from_task_status(response: TaskStatusResponse) -> str | None: async def get_response( cls: type[IO.ComfyNode], task_id: str, estimated_duration: int | None = None ) -> TaskStatusResponse: - """Poll the task status until it is finished then get the response.""" return await poll_op( cls, ApiEndpoint(path=f"{PATH_GET_TASK_STATUS}/{task_id}"), response_model=TaskStatusResponse, - status_extractor=lambda r: r.status.value, + status_extractor=lambda r: r.status, estimated_duration=estimated_duration, - progress_extractor=extract_progress_from_task_status, + progress_extractor=lambda r: r.progress * 100 if r.progress is not None else None, ) @@ -127,7 +133,7 @@ async def generate_video( final_response = await get_response(cls, initial_response.id, estimated_duration) if not final_response.output: - raise RunwayApiError("Runway task succeeded but no video data found in response.") + raise ValueError("Runway task succeeded but no video data found in response.") video_url = get_video_url_from_task_status(final_response) return await download_url_to_video_output(video_url) @@ -410,7 +416,7 @@ class RunwayFirstLastFrameNode(IO.ComfyNode): mime_type="image/png", ) if len(download_urls) != 2: - raise RunwayApiError("Failed to upload one or more images to comfy api.") + raise ValueError("Failed to upload one or more images to comfy api.") return IO.NodeOutput( await generate_video( @@ -514,11 +520,321 @@ class RunwayTextToImageNode(IO.ComfyNode): estimated_duration=AVERAGE_DURATION_T2I_SECONDS, ) if not final_response.output: - raise RunwayApiError("Runway task succeeded but no image data found in response.") + raise ValueError("Runway task succeeded but no image data found in response.") return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response))) +_TIMING_ABSOLUTE = "Absolute time (seconds)" +_TIMING_FRACTION = "Fraction of duration (0.0-1.0)" + + +class RunwayAleph2KeyframeNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RunwayAleph2KeyframeNode", + display_name="Runway Aleph2 Keyframe", + category="partner/video/Runway", + description="Anchor a guidance image to a moment of the input (source) video, so Aleph2 " + "steers the edit at that point of your footage. Connect this to the 'keyframes' input of " + "the Runway Aleph2 Video to Video node; chain several together (up to 5) via the optional " + "'keyframes' input below.", + inputs=[ + IO.Image.Input( + "image", + tooltip="The guidance image to apply at the chosen moment of the input video.", + ), + IO.DynamicCombo.Input( + "timing", + options=[ + IO.DynamicCombo.Option( + _TIMING_ABSOLUTE, + [ + IO.Float.Input( + "seconds", + default=0.0, + min=0.0, + max=30.0, + step=0.1, + display_mode=IO.NumberDisplay.number, + tooltip="Time in seconds from start of the input video where this image applies.", + ), + ], + ), + IO.DynamicCombo.Option( + _TIMING_FRACTION, + [ + IO.Float.Input( + "fraction", + default=0.0, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.number, + tooltip="Where in the input video this image applies, " + "as a fraction of its duration (0.0 = start, 1.0 = end).", + ), + ], + ), + ], + tooltip="How to place this image on the input video's timeline.", + ), + IO.Custom(RunwayAleph2IO.KEYFRAME).Input( + "keyframes", + optional=True, + tooltip="Optional earlier keyframes to chain with this one.", + ), + ], + outputs=[IO.Custom(RunwayAleph2IO.KEYFRAME).Output(display_name="keyframes")], + ) + + @classmethod + def execute( + cls, + image: Input.Image, + timing: dict, + keyframes: RunwayAleph2KeyframeChain | None = None, + ) -> IO.NodeOutput: + chain = keyframes.clone() if keyframes is not None else RunwayAleph2KeyframeChain() + if timing["timing"] == _TIMING_ABSOLUTE: + mode, value = KEYFRAME_MODE_SECONDS, float(timing["seconds"]) + else: + mode, value = KEYFRAME_MODE_AT, float(timing["fraction"]) + chain.add(RunwayAleph2KeyframeItem(image=image, mode=mode, value=value)) + return IO.NodeOutput(chain) + + +class RunwayAleph2PromptImageNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RunwayAleph2PromptImageNode", + display_name="Runway Aleph2 Prompt Image", + category="partner/video/Runway", + description="Anchor a guidance image to a moment of the output (result) video, to guide what " + "the edited video looks like at that point. Connect this to the 'prompt_images' input of the " + "Runway Aleph2 Video to Video node; chain several together (up to 5) via the optional " + "'prompt_images' input below.", + inputs=[ + IO.Image.Input( + "image", + tooltip="The guidance image to place at the chosen moment of the output video.", + ), + IO.DynamicCombo.Input( + "position", + options=[ + IO.DynamicCombo.Option( + _TIMING_ABSOLUTE, + [ + IO.Float.Input( + "seconds", + default=0.0, + min=0.0, + max=30.0, + step=0.1, + display_mode=IO.NumberDisplay.number, + tooltip="Time in seconds from start of the output video where this image applies.", + ), + ], + ), + IO.DynamicCombo.Option( + _TIMING_FRACTION, + [ + IO.Float.Input( + "fraction", + default=0.0, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.number, + tooltip="Where in the output video this image applies, " + "as a fraction of its duration (0.0 = start, 1.0 = end).", + ), + ], + ), + ], + tooltip="How to place this image on the output video's timeline.", + ), + IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Input( + "prompt_images", + optional=True, + tooltip="Optional earlier prompt images to chain with this one.", + ), + ], + outputs=[IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Output(display_name="prompt_images")], + ) + + @classmethod + def execute( + cls, + image: Input.Image, + position: dict, + prompt_images: RunwayAleph2PromptImageChain | None = None, + ) -> IO.NodeOutput: + chain = prompt_images.clone() if prompt_images is not None else RunwayAleph2PromptImageChain() + if position["position"] == _TIMING_ABSOLUTE: + mode, value = PROMPT_IMAGE_MODE_TIMESTAMP, float(position["seconds"]) + else: + mode, value = PROMPT_IMAGE_MODE_POSITION, float(position["fraction"]) + chain.add(RunwayAleph2PromptImageItem(image=image, mode=mode, value=value)) + return IO.NodeOutput(chain) + + +class RunwayAleph2VideoToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RunwayAleph2VideoToVideoNode", + display_name="Runway Aleph2 Video to Video", + category="partner/video/Runway", + description="Edit a video with a text prompt using Runway's Aleph2 model. Aleph2 transforms " + "your footage (restyle, relight, add or remove elements, change the viewpoint) while keeping " + "the original motion and timing; the output resolution matches the input video, which must be " + "2-30 seconds at 30 fps or lower. Optionally steer the edit with either keyframes (anchored to " + "the input video) or prompt images (anchored to the output video) - use one or the other, not both.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Describes what should appear in the output (1-1000 characters).", + ), + IO.Video.Input( + "video", + tooltip="Input video to edit. Must be 2-30 seconds at 30 fps or lower.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=4294967295, + step=1, + control_after_generate=True, + display_mode=IO.NumberDisplay.number, + tooltip="Random seed for generation", + ), + IO.Combo.Input( + "public_figure_threshold", + options=["auto", "low"], + default="low", + tooltip="Content moderation for recognizable public figures.", + ), + IO.Custom(RunwayAleph2IO.KEYFRAME).Input( + "keyframes", + optional=True, + tooltip="Guidance images anchored to the input video, from Aleph2 Keyframe nodes (up to 5). " + "Use keyframes or prompt images, not both.", + ), + IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Input( + "prompt_images", + optional=True, + tooltip="Guidance images anchored to the output video, from Aleph2 Prompt Image nodes (up to 5). " + "Use keyframes or prompt images, not both.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd": 0.4004, "format":{"suffix":"/second"}}""", + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + video: Input.Video, + seed: int, + public_figure_threshold: str = "low", + keyframes: RunwayAleph2KeyframeChain | None = None, + prompt_images: RunwayAleph2PromptImageChain | None = None, + ) -> IO.NodeOutput: + validate_string(prompt, min_length=1, max_length=1000) + validate_video_duration( + video, + min_duration=2.0, + max_duration=30.0, + ) + try: + fps = float(video.get_frame_rate()) + except Exception: + fps = None + if fps is not None and fps > 30.0 + 0.01: + raise ValueError(f"Input video frame rate ({fps:.2f} fps) exceeds Aleph2's maximum of 30 fps.") + + if (keyframes and keyframes.items) and (prompt_images and prompt_images.items): + raise ValueError("Aleph2 accepts either keyframes or prompt images, not both.") + + video_duration: float | None = None + try: + video_duration = video.get_duration() + except Exception: + video_duration = None + + def _check_seconds(value: float, label: str) -> None: + if video_duration is not None and value > video_duration + 0.0001: + raise ValueError(f"{label} {value:.2f}s exceeds the input video duration ({video_duration:.2f}s).") + + video_url = await upload_video_to_comfyapi(cls, video) + + keyframe_models: list[RunwayAleph2KeyframeSeconds | RunwayAleph2KeyframeAt] = [] + if keyframes is not None: + if len(keyframes.items) > 5: + raise ValueError("Aleph2 supports at most 5 keyframes.") + for item in keyframes.items: + image_url = await upload_image_to_comfyapi(cls, item.image, mime_type="image/png") + if item.mode == KEYFRAME_MODE_SECONDS: + _check_seconds(item.value, "Keyframe timestamp") + keyframe_models.append(RunwayAleph2KeyframeSeconds(seconds=item.value, uri=image_url)) + else: + keyframe_models.append(RunwayAleph2KeyframeAt(at=item.value, uri=image_url)) + + prompt_image_models: list[RunwayAleph2PromptImage] = [] + if prompt_images is not None: + if len(prompt_images.items) > 5: + raise ValueError("Aleph2 supports at most 5 prompt images.") + for item in prompt_images.items: + image_url = await upload_image_to_comfyapi(cls, item.image, mime_type="image/png") + position: RunwayAleph2TimestampPosition | RunwayAleph2RelativePosition + if item.mode == PROMPT_IMAGE_MODE_TIMESTAMP: + _check_seconds(item.value, "Prompt image timestamp") + position = RunwayAleph2TimestampPosition(timestampSeconds=item.value) + else: + position = RunwayAleph2RelativePosition(positionPercentage=item.value) + prompt_image_models.append(RunwayAleph2PromptImage(position=position, uri=image_url)) + + initial_response = await sync_op( + cls, + endpoint=ApiEndpoint(path=PATH_VIDEO_TO_VIDEO, method="POST"), + response_model=RunwayAleph2Response, + data=RunwayAleph2Request( + promptText=prompt, + videoUri=video_url, + seed=seed, + contentModeration=RunwayAleph2ContentModeration(publicFigureThreshold=public_figure_threshold), + keyframes=keyframe_models or None, + promptImage=prompt_image_models or None, + ), + ) + + final_response = await get_response(cls, initial_response.id) + if not final_response.output: + raise ValueError("Runway task succeeded but no video data found in response.") + + return IO.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(final_response))) + + class RunwayExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -527,6 +843,9 @@ class RunwayExtension(ComfyExtension): RunwayImageToVideoNodeGen3a, RunwayImageToVideoNodeGen4, RunwayTextToImageNode, + RunwayAleph2VideoToVideoNode, + RunwayAleph2KeyframeNode, + RunwayAleph2PromptImageNode, ] From 7277d99d3ab9f4ced8db4e87c82bad68028aa80d Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:38:39 -0700 Subject: [PATCH 18/52] Use comfy kitchen apply rope in omnigen2 model. (#14442) --- comfy/ldm/omnigen/omnigen2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/comfy/ldm/omnigen/omnigen2.py b/comfy/ldm/omnigen/omnigen2.py index 82edc92da..e9ca5229d 100644 --- a/comfy/ldm/omnigen/omnigen2.py +++ b/comfy/ldm/omnigen/omnigen2.py @@ -8,6 +8,7 @@ import torch.nn.functional as F from einops import rearrange, repeat from comfy.ldm.lightricks.model import Timesteps from comfy.ldm.flux.layers import EmbedND +from comfy.ldm.flux.math import apply_rope1 from comfy.ldm.modules.attention import optimized_attention_masked import comfy.model_management import comfy.ldm.common_dit @@ -17,9 +18,7 @@ def apply_rotary_emb(x, freqs_cis): if x.shape[1] == 0: return x - t_ = x.reshape(*x.shape[:-1], -1, 1, 2) - t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1] - return t_out.reshape(*x.shape).to(dtype=x.dtype) + return apply_rope1(x, freqs_cis) def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: From fe54b5e955edf60ecbbde627712bea8dece7167a Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Sat, 13 Jun 2026 16:05:25 +0300 Subject: [PATCH 19/52] Add 10-bit video support (#14452) Create Video gets a bit_depth option (8-bit/10-bit); the selected depth is carried by the video and applied when it gets encoded. Save Video and Video Slice now keep the source bit depth instead of always quantizing to 8-bit, so 10-bit videos stay 10-bit. 10-bit uses h264 with the yuv420p10le pixel format,so there's no new codec or container. Signed-off-by: bigcat88 --- comfy_api/latest/_input/video_types.py | 13 ++- comfy_api/latest/_input_impl/video_types.py | 50 ++++++++-- comfy_extras/nodes_video.py | 25 ++++- .../comfy_api_test/video_bit_depth_test.py | 93 +++++++++++++++++++ 4 files changed, 169 insertions(+), 12 deletions(-) create mode 100644 tests-unit/comfy_api_test/video_bit_depth_test.py diff --git a/comfy_api/latest/_input/video_types.py b/comfy_api/latest/_input/video_types.py index 8fff52c16..e2e99521f 100644 --- a/comfy_api/latest/_input/video_types.py +++ b/comfy_api/latest/_input/video_types.py @@ -27,10 +27,13 @@ class VideoInput(ABC): path: Union[str, IO[bytes]], format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, + bit_depth: int | None = None, ): """ Abstract method to save the video input to a file. + + bit_depth selects the encoded bit depth; None keeps the video's native depth. """ pass @@ -83,6 +86,14 @@ class VideoInput(ABC): components = self.get_components() return components.images.shape[2], components.images.shape[1] + def get_bit_depth(self) -> int: + """ + Returns the bit depth of the video (e.g. 8 or 10). + + Default implementation returns 8; subclasses report their real depth. + """ + return 8 + def get_duration(self) -> float: """ Returns the duration of the video in seconds. diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 4a12ff9c1..dfdf58515 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -52,6 +52,12 @@ def get_open_write_kwargs( return open_kwargs +def video_stream_bit_depth(stream) -> int: + if stream is None or stream.format is None or not stream.format.components: + return 8 + return max(component.bits for component in stream.format.components) + + class VideoFromFile(VideoInput): """ Class representing video input from a file. @@ -97,6 +103,13 @@ class VideoFromFile(VideoInput): return stream.width, stream.height raise ValueError(f"No video stream found in file '{self.__file}'") + def get_bit_depth(self) -> int: + if isinstance(self.__file, io.BytesIO): + self.__file.seek(0) # Reset the BytesIO object to the beginning + with av.open(self.__file, mode="r") as container: + video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None + return video_stream_bit_depth(video_stream) + def get_duration(self) -> float: """ Returns the duration of the video in seconds. @@ -377,25 +390,32 @@ class VideoFromFile(VideoInput): format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, metadata: Optional[dict] = None, + bit_depth: int | None = None, ): if isinstance(self.__file, io.BytesIO): self.__file.seek(0) # Reset the BytesIO object to the beginning with av.open(self.__file, mode='r') as container: container_format = container.format.name - video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None + video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None + video_encoding = video_stream.codec.name if video_stream is not None else None + source_bit_depth = video_stream_bit_depth(video_stream) reuse_streams = True if format != VideoContainer.AUTO and format not in container_format.split(","): reuse_streams = False if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: reuse_streams = False + if bit_depth is not None and video_encoding is not None and bit_depth != source_bit_depth: + reuse_streams = False if self.__start_time or self.__duration: reuse_streams = False if not reuse_streams: + if bit_depth is None: + bit_depth = source_bit_depth components = self.get_components_internal(container) video = VideoFromComponents(components) return video.save_to( - path, format=format, codec=codec, metadata=metadata + path, format=format, codec=codec, metadata=metadata, bit_depth=bit_depth, ) streams = container.streams @@ -451,8 +471,10 @@ class VideoFromComponents(VideoInput): Class representing video input from tensors. """ - def __init__(self, components: VideoComponents): + def __init__(self, components: VideoComponents, bit_depth: int = 8): self.__components = components + # Tensor components have no inherent bit depth; this is the depth used when encoding. + self.__bit_depth = bit_depth def get_components(self) -> VideoComponents: return VideoComponents( @@ -461,18 +483,26 @@ class VideoFromComponents(VideoInput): frame_rate=self.__components.frame_rate, ) + def get_bit_depth(self) -> int: + return self.__bit_depth + def save_to( self, path: str, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, metadata: Optional[dict] = None, + bit_depth: int | None = None, ): """Save the video to a file path or BytesIO buffer.""" if format != VideoContainer.AUTO and format != VideoContainer.MP4: raise ValueError("Only MP4 format is supported for now") if codec != VideoCodec.AUTO and codec != VideoCodec.H264: raise ValueError("Only H264 codec is supported for now") + # None means "use the depth this video was created with" (CreateVideo's choice). + if bit_depth is None: + bit_depth = self.__bit_depth + is_10bit = bit_depth >= 10 extra_kwargs = {} if isinstance(format, VideoContainer) and format != VideoContainer.AUTO: extra_kwargs["format"] = format.value @@ -488,10 +518,11 @@ class VideoFromComponents(VideoInput): frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000) # Create a video stream + pix_fmt = "yuv420p10le" if is_10bit else "yuv420p" video_stream = output.add_stream('h264', rate=frame_rate) video_stream.width = self.__components.images.shape[2] video_stream.height = self.__components.images.shape[1] - video_stream.pix_fmt = 'yuv420p' + video_stream.pix_fmt = pix_fmt # Create an audio stream audio_sample_rate = 1 @@ -505,9 +536,14 @@ class VideoFromComponents(VideoInput): # Encode video for i, frame in enumerate(self.__components.images): - img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3) - frame = av.VideoFrame.from_ndarray(img, format='rgb24') - frame = frame.reformat(format='yuv420p') # Convert to YUV420P as required by h264 + if is_10bit: + # 16-bit RGB keeps float precision through the conversion to 10-bit YUV. + img = (frame.float() * 65535).clamp(0, 65535).cpu().numpy().astype(np.uint16) # shape: (H, W, 3) + frame = av.VideoFrame.from_ndarray(img, format="rgb48le") + else: + img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3) + frame = av.VideoFrame.from_ndarray(img, format='rgb24') + frame = frame.reformat(format=pix_fmt) packet = video_stream.encode(frame) output.mux(packet) diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index 6f6c416a6..050a897dd 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -134,6 +134,17 @@ class CreateVideo(io.ComfyNode): io.Image.Input("images", tooltip="The images to create a video from."), io.Float.Input("fps", default=30.0, min=1.0, max=120.0, step=1.0), io.Audio.Input("audio", optional=True, tooltip="The audio to add to the video."), + io.Int.Input( + "bit_depth", + min=8, + max=10, + default=8, + step=2, + tooltip="Bit depth of the created video. 10-bit keeps smoother gradients with less" + " banding, but some players and downstream nodes may not support it.", + optional=True, + display_mode=io.NumberDisplay.number, + ), ], outputs=[ io.Video.Output(), @@ -141,9 +152,14 @@ class CreateVideo(io.ComfyNode): ) @classmethod - def execute(cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None) -> io.NodeOutput: + def execute( + cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None, bit_depth: int = 8, + ) -> io.NodeOutput: return io.NodeOutput( - InputImpl.VideoFromComponents(Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps))) + InputImpl.VideoFromComponents( + Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)), + bit_depth=bit_depth, + ) ) class GetVideoComponents(io.ComfyNode): @@ -154,7 +170,7 @@ class GetVideoComponents(io.ComfyNode): search_aliases=["extract frames", "split video", "video to images", "demux"], display_name="Get Video Components", category="video", - description="Extracts all components from a video: frames, audio, and framerate.", + description="Extracts all components from a video: frames, audio, framerate, and bit depth.", inputs=[ io.Video.Input("video", tooltip="The video to extract components from."), ], @@ -162,13 +178,14 @@ class GetVideoComponents(io.ComfyNode): io.Image.Output(display_name="images"), io.Audio.Output(display_name="audio"), io.Float.Output(display_name="fps"), + io.Int.Output(display_name="bit_depth"), ], ) @classmethod def execute(cls, video: Input.Video) -> io.NodeOutput: components = video.get_components() - return io.NodeOutput(components.images, components.audio, float(components.frame_rate)) + return io.NodeOutput(components.images, components.audio, float(components.frame_rate), video.get_bit_depth()) class LoadVideo(io.ComfyNode): diff --git a/tests-unit/comfy_api_test/video_bit_depth_test.py b/tests-unit/comfy_api_test/video_bit_depth_test.py new file mode 100644 index 000000000..6c7bc9163 --- /dev/null +++ b/tests-unit/comfy_api_test/video_bit_depth_test.py @@ -0,0 +1,93 @@ +import pytest +import torch +import av +import numpy as np +from fractions import Fraction +from comfy_api.latest._input_impl.video_types import VideoFromFile, VideoFromComponents +from comfy_api.latest._util.video_types import VideoComponents + + +@pytest.fixture(scope="module") +def gradient_components(): + """Narrow horizontal ramp (0.25..0.30) that needs more than 8 bits to stay smooth""" + width, height, frames = 64, 64, 3 + ramp = torch.linspace(0.25, 0.30, width).view(1, 1, width, 1).expand(frames, height, width, 3) + return VideoComponents(images=ramp.contiguous(), frame_rate=Fraction(30)) + + +@pytest.fixture(scope="module") +def src8(gradient_components, tmp_path_factory): + """8-bit h264 mp4 (Create Video default)""" + path = str(tmp_path_factory.mktemp("video") / "src8.mp4") + VideoFromComponents(gradient_components).save_to(path) + return path + + +@pytest.fixture(scope="module") +def src10(gradient_components, tmp_path_factory): + """10-bit h264 mp4 (Create Video with bit_depth=10)""" + path = str(tmp_path_factory.mktemp("video") / "src10.mp4") + VideoFromComponents(gradient_components, bit_depth=10).save_to(path) + return path + + +def probe(path): + """(codec, pix_fmt, bit_depth) of the first video stream""" + with av.open(path) as container: + stream = container.streams.video[0] + return (stream.codec.name, stream.format.name, max(c.bits for c in stream.format.components)) + + +def decoded_levels(path): + """Unique tonal levels in the first decoded frame (banding measure)""" + with av.open(path) as container: + frame = next(container.decode(container.streams.video[0])) + return len(np.unique(frame.to_ndarray(format="gbrpf32le")[..., 0])) + + +def video_packet_bytes(path): + """Raw video packet payloads; identical to the source's only for a true remux""" + with av.open(path) as container: + return [bytes(p) for p in container.demux(container.streams.video[0]) if p.size] + + +def test_create_video_bit_depth(src8, src10): + """Create Video's bit_depth picks the encoded depth (default 8-bit); 10-bit reduces banding""" + assert probe(src8) == ("h264", "yuv420p", 8) + assert probe(src10) == ("h264", "yuv420p10le", 10) + assert decoded_levels(src10) > 2 * decoded_levels(src8) + + +def test_save_auto_keeps_source_depth(src8, src10, tmp_path): + """Save Video (no bit_depth = auto) stream-copies the source, preserving its depth byte-for-byte""" + for name, src in [("p8", src8), ("p10", src10)]: + path = str(tmp_path / f"{name}.mp4") + VideoFromFile(src).save_to(path) + assert probe(path) == probe(src) + assert video_packet_bytes(path) == video_packet_bytes(src) + + +def test_save_explicit_depth_reencodes(src8, src10, tmp_path): + """An explicit bit_depth different from the source forces a re-encode to that depth""" + down = str(tmp_path / "down8.mp4") + VideoFromFile(src10).save_to(down, bit_depth=8) + assert probe(down) == ("h264", "yuv420p", 8) + + up = str(tmp_path / "up10.mp4") + VideoFromFile(src8).save_to(up, bit_depth=10) + assert probe(up) == ("h264", "yuv420p10le", 10) + + +def test_trim_keeps_source_depth(src10, tmp_path): + """Video Slice re-encodes (trim) but preserves the source's 10-bit depth""" + path = str(tmp_path / "trim.mp4") + VideoFromFile(src10).as_trimmed(start_time=0, duration=1 / 30, strict_duration=False).save_to(path) + assert probe(path) == ("h264", "yuv420p10le", 10) + + +def test_get_bit_depth(gradient_components, src8, src10): + """get_bit_depth reports a video's depth (backs the Get Video Components output)""" + assert VideoFromFile(src8).get_bit_depth() == 8 + assert VideoFromFile(src10).get_bit_depth() == 10 + assert VideoFromComponents(gradient_components, bit_depth=10).get_bit_depth() == 10 + assert VideoFromComponents(gradient_components).get_bit_depth() == 8 From b664349ae72cf2fe5e812761421b6de5a987c409 Mon Sep 17 00:00:00 2001 From: Robin Huang Date: Sat, 13 Jun 2026 07:15:49 -0700 Subject: [PATCH 20/52] Expose deploy_environment in /system_stats (#14402) --- server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server.py b/server.py index ccc92e5ab..6b0029adf 100644 --- a/server.py +++ b/server.py @@ -27,6 +27,7 @@ import logging import mimetypes from comfy.cli_args import args +from comfy.deploy_environment import get_deploy_environment import comfy.utils import comfy.model_management from comfy_api import feature_flags @@ -690,6 +691,7 @@ class PromptServer(): "python_version": sys.version, "pytorch_version": comfy.model_management.torch_version, "embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded", + "deploy_environment": get_deploy_environment(), "argv": sys.argv }, "devices": device_entries From 740d347279c3a1697e54ebede31e5d6ac4831c18 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sat, 13 Jun 2026 12:47:04 -0700 Subject: [PATCH 21/52] Remove the comfy python path append. --- nodes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nodes.py b/nodes.py index 0d422d418..916fa0ccc 100644 --- a/nodes.py +++ b/nodes.py @@ -20,8 +20,6 @@ from PIL.PngImagePlugin import PngInfo import numpy as np import safetensors.torch -sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) - import comfy.diffusers_load import comfy.samplers import comfy.sample From 64cc0780691ae9d8e2b7284cd930254631017274 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sat, 13 Jun 2026 12:50:31 -0700 Subject: [PATCH 22/52] Revert last commit. Last time I use this stupid GitHub app. --- nodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nodes.py b/nodes.py index 916fa0ccc..0d422d418 100644 --- a/nodes.py +++ b/nodes.py @@ -20,6 +20,8 @@ from PIL.PngImagePlugin import PngInfo import numpy as np import safetensors.torch +sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) + import comfy.diffusers_load import comfy.samplers import comfy.sample From a1d95f3f8266ea5aef6f8784a5bbc016b143de70 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Sat, 13 Jun 2026 19:58:48 -0500 Subject: [PATCH 23/52] Fix nondeterministic video decode at unaligned widths (CORE-299) (#14438) --- comfy_api/latest/_input_impl/video_types.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index dfdf58515..92a1298c0 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -270,6 +270,7 @@ class VideoFromFile(VideoInput): image_format = 'gbrpf32le' process_image_format = lambda a: a + align_graph = None audio = None streams = [video_stream] @@ -323,7 +324,24 @@ class VideoFromFile(VideoInput): checked_alpha = True - img = frame.to_ndarray(format=image_format) # shape: (H, W, 4) + # Fix non-deterministic video decode when the video width is not a multiple of 32 + # For non-yuvj pixel formats (all H.264/H.265 video) + if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0: + if align_graph is None: + pad_w = ((frame.width + 31) // 32) * 32 + g = av.filter.Graph() + g_src = g.add_buffer(width=frame.width, height=frame.height, + format=frame.format.name, time_base=video_stream.time_base) + g_pad = g.add('pad', f'{pad_w}:{frame.height}:0:0') + g_sink = g.add('buffersink') + g_src.link_to(g_pad) + g_pad.link_to(g_sink) + g.configure() + align_graph = (g, g_src, g_sink) + align_graph[1].push(frame) + img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:, :frame.width]) + else: + img = frame.to_ndarray(format=image_format) if frame.rotation != 0: k = int(round(frame.rotation // 90)) img = np.rot90(img, k=k, axes=(0, 1)).copy() From 5897d0c3aecb969aaf36f2d31c8a14f3eee5df58 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Sun, 14 Jun 2026 17:19:20 +0300 Subject: [PATCH 24/52] [Partner Nodes] feat(Tripo3d): add new "Import 3D" node (#14466) Signed-off-by: bigcat88 --- comfy_api_nodes/apis/tripo.py | 20 +++++++ comfy_api_nodes/nodes_tripo.py | 100 ++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 1 deletion(-) diff --git a/comfy_api_nodes/apis/tripo.py b/comfy_api_nodes/apis/tripo.py index 7ac81d42c..79913997a 100644 --- a/comfy_api_nodes/apis/tripo.py +++ b/comfy_api_nodes/apis/tripo.py @@ -208,6 +208,10 @@ class TripoMultiviewToModelRequest(BaseModel): quad: bool | None = Field(False, description="Whether to apply quad to the generated model") +class TripoTexturePrompt(BaseModel): + text: str | None = Field(None, description="Text guidance for texture generation") + + class TripoTextureModelRequest(BaseModel): type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description="Type of task") original_model_task_id: str = Field(..., description="The task ID of the original model") @@ -219,6 +223,11 @@ class TripoTextureModelRequest(BaseModel): texture_alignment: TripoTextureAlignment | None = Field( TripoTextureAlignment.ORIGINAL_IMAGE, description="The texture alignment method" ) + texture_prompt: TripoTexturePrompt | None = Field( + None, + description="Optional guidance for texturing. Required in practice for imported models, " + "which carry no source image to infer texture from.", + ) class TripoRefineModelRequest(BaseModel): @@ -307,6 +316,17 @@ class TripoP1MultiviewToModelRequest(TripoP1CommonRequest): orientation: str | None = None +class TripoImportModelRequest(BaseModel): + """Request for the comfy-api composite import endpoint (/proxy/tripo/v2/openapi/import). + + The model file is uploaded to ComfyUI API storage first; the backend downloads it from + `url`, re-uploads it to Tripo's storage and creates the import_model task server-side. + """ + + url: str = Field(..., description="ComfyUI API storage download URL of the model file") + format: str = Field(..., description='File format: "glb", "fbx", "obj" or "stl"') + + class TripoTaskOutput(BaseModel): model: str | None = Field(None, description="URL to the model") base_model: str | None = Field(None, description="URL to the base model") diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py index a3f2cb053..228fe8a1d 100644 --- a/comfy_api_nodes/nodes_tripo.py +++ b/comfy_api_nodes/nodes_tripo.py @@ -1,6 +1,6 @@ from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api.latest import IO, ComfyExtension, Input, Types from comfy_api_nodes.apis.tripo import ( TripoAnimateRetargetRequest, TripoAnimateRigRequest, @@ -8,6 +8,7 @@ from comfy_api_nodes.apis.tripo import ( TripoFileEmptyReference, TripoFileReference, TripoImageToModelRequest, + TripoImportModelRequest, TripoModelVersion, TripoMultiviewToModelRequest, TripoOrientation, @@ -21,6 +22,7 @@ from comfy_api_nodes.apis.tripo import ( TripoTaskType, TripoTextToModelRequest, TripoTextureModelRequest, + TripoTexturePrompt, TripoUrlReference, ) from comfy_api_nodes.util import ( @@ -28,6 +30,7 @@ from comfy_api_nodes.util import ( download_url_to_file_3d, poll_op, sync_op, + upload_3d_model_to_comfyapi, upload_images_to_comfyapi, ) @@ -538,6 +541,14 @@ class TripoTextureNode(IO.ComfyNode): optional=True, advanced=True, ), + IO.String.Input( + "texture_prompt", + default="", + multiline=True, + optional=True, + tooltip="Optional text guidance for texturing. Required in practice for imported " + "models (Tripo: Import Model), which carry no source image to infer colors from.", + ), ], outputs=[ IO.String.Output(display_name="model_file"), # for backward compatibility only @@ -571,6 +582,7 @@ class TripoTextureNode(IO.ComfyNode): texture_seed: int | None = None, texture_quality: str | None = None, texture_alignment: str | None = None, + texture_prompt: str = "", ) -> IO.NodeOutput: response = await sync_op( cls, @@ -583,6 +595,7 @@ class TripoTextureNode(IO.ComfyNode): texture_seed=texture_seed, texture_quality=texture_quality, texture_alignment=texture_alignment, + texture_prompt=TripoTexturePrompt(text=texture_prompt.strip()) if texture_prompt.strip() else None, ), ) return await poll_until_finished(cls, response, average_duration=80) @@ -915,6 +928,90 @@ class TripoConversionNode(IO.ComfyNode): return await poll_until_finished(cls, response, average_duration=30) +class TripoImportModelNode(IO.ComfyNode): + """Imports an external 3D model into Tripo, producing a MODEL_TASK_ID for post-processing nodes.""" + + SUPPORTED_FORMATS = ("glb", "fbx", "obj", "stl") + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TripoImportModelNode", + display_name="Tripo: Import Model", + category="partner/3d/Tripo", + description="Import an external 3D model (e.g. from Rodin, Hunyuan3D or a local file) into Tripo " + "to use it with Tripo's post-processing nodes: Texture, Rig, Convert. " + "GLB is recommended: textures survive import only when embedded in the file. " + "Note that texturing an imported model requires a texture prompt.", + inputs=[ + IO.MultiType.Input( + "model_3d", + types=[IO.File3DGLB, IO.File3DFBX, IO.File3DOBJ, IO.File3DSTL, IO.File3DAny], + tooltip="3D model to import (GLB / FBX / OBJ / STL, up to 150 MB). " + "OBJ and STL files carry no embedded textures.", + ), + ], + outputs=[ + IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"text","text":"Free"}""", + ), + ) + + @classmethod + async def execute(cls, model_3d: Types.File3D) -> IO.NodeOutput: + file_format = (model_3d.format or "").lstrip(".").lower() + if file_format == "gltf": + raise ValueError( + "GLTF (.gltf) references external files and cannot be imported. Export a single-file GLB instead." + ) + if file_format not in cls.SUPPORTED_FORMATS: + raise ValueError( + f"Unsupported 3D format '{file_format or 'unknown'}'. " + f"Tripo import supports: {', '.join(f.upper() for f in cls.SUPPORTED_FORMATS)}." + ) + size = len(model_3d.get_bytes()) + if size > 150 * 1024 * 1024: + raise ValueError(f"Model file is {size / (1024 * 1024):.1f} MB; Tripo import allows up to 150 MB.") + + url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format) + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/import", method="POST"), + response_model=TripoTaskResponse, + data=TripoImportModelRequest(url=url, format=file_format), + ) + if response.code != 0: + raise RuntimeError(f"Failed to import model: {response.error}") + + task_id = response.data.task_id + response_poll = await poll_op( + cls, + poll_endpoint=ApiEndpoint(path=f"/proxy/tripo/v2/openapi/task/{task_id}"), + response_model=TripoTaskResponse, + failed_statuses=[ + TripoTaskStatus.FAILED, + TripoTaskStatus.CANCELLED, + TripoTaskStatus.UNKNOWN, + TripoTaskStatus.BANNED, + TripoTaskStatus.EXPIRED, + ], + status_extractor=lambda x: x.data.status, + progress_extractor=lambda x: x.data.progress, + estimated_duration=10, + ) + if response_poll.data.status != TripoTaskStatus.SUCCESS: + raise RuntimeError(f"Failed to import model: {response_poll}") + return IO.NodeOutput(task_id) + + def _p1_price_expr(*, geometry_credits: int, textured_credits: int, detailed_credits: int) -> str: return ( "(" @@ -1292,6 +1389,7 @@ class TripoExtension(ComfyExtension): TripoP1TextToModelNode, TripoP1ImageToModelNode, TripoP1MultiviewToModelNode, + TripoImportModelNode, TripoTextureNode, TripoRefineNode, TripoRigNode, From e1b9366898a4657bceea8737d74139406e4ea521 Mon Sep 17 00:00:00 2001 From: "Dr.Lt.Data" <128333288+ltdrdata@users.noreply.github.com> Date: Mon, 15 Jun 2026 03:42:03 +0900 Subject: [PATCH 25/52] bump manager version to 4.2.2 (#14471) --- manager_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manager_requirements.txt b/manager_requirements.txt index a079d3492..13786bb35 100644 --- a/manager_requirements.txt +++ b/manager_requirements.txt @@ -1 +1 @@ -comfyui_manager==4.2.1 +comfyui_manager==4.2.2 From 4388eb781ab35c612c14d86d346b4663640c30e6 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:47:22 -0700 Subject: [PATCH 26/52] This is already auto enabled by default. (#14476) --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 786a14166..61036bd81 100644 --- a/README.md +++ b/README.md @@ -382,11 +382,7 @@ For AMD 7600 and maybe other RDNA3 cards: ```HSA_OVERRIDE_GFX_VERSION=11.0.0 pyt ### AMD ROCm Tips -You can enable experimental memory efficient attention on recent pytorch in ComfyUI on some AMD GPUs using this command, it should already be enabled by default on RDNA3. If this improves speed for you on latest pytorch on your GPU please report it so that I can enable it by default. - -```TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 python main.py --use-pytorch-cross-attention``` - -You can also try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run. +You can try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run. # Notes From 7d4194d984abbfcd49ec93a615b95327c031ac69 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Mon, 15 Jun 2026 16:35:36 +0800 Subject: [PATCH 27/52] chore: update embedded docs to v0.5.4 (#14478) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a49d968af..993baf975 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.45.15 comfyui-workflow-templates==0.9.98 -comfyui-embedded-docs==0.5.3 +comfyui-embedded-docs==0.5.4 torch torchsde torchvision From ec4dec93d254000cf0f3dd9dc53768bcff794ee0 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Tue, 16 Jun 2026 00:54:36 +1000 Subject: [PATCH 28/52] Comfy Aimdo 0.4.10 + Dynamic --reserve-vram + --vram-headroom (#14480) * main: implement --vram-headroom Implement --vram-headroom for dynamic vram as a hybrid debug/diagnostic option that can be used for people who still report shared VRAM spills. They can trial and error the setting to maintain a bit more headroom to avoid shared VRAM spills. * main: implement --reserve-vram Implement --reserve-vram as extra headroom on the simple method which is semantically as close as possible to the stated functionality and formet behaviour of non-dynamic VRAM. --- comfy/cli_args.py | 1 + main.py | 4 ++-- requirements.txt | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index e7ee0d5eb..e3099a230 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -145,6 +145,7 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn' vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.") +parser.add_argument("--vram-headroom", type=float, default=0, help="Set the amount of vram in GB for DynamicVRAM to maintain as extra headroom above default. ComfyUI will try and keep this much VRAM completely free and unused, even counting VRAM from other apps.") parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.") parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.") diff --git a/main.py b/main.py index 0ad660376..2cdb9caad 100644 --- a/main.py +++ b/main.py @@ -55,7 +55,7 @@ if __name__ == "__main__" and args.debug_hang: import comfy_aimdo.control if enables_dynamic_vram(): - comfy_aimdo.control.init() + comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3)) if os.name == "nt": os.environ['MIMALLOC_PURGE_DELAY'] = '0' @@ -231,7 +231,7 @@ import comfy.model_patcher if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()): if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)): logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") - elif comfy_aimdo.control.init_devices(d.index for d in comfy.model_management.get_all_torch_devices()): + elif comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices()): if args.verbose == 'DEBUG': comfy_aimdo.control.set_log_debug() elif args.verbose == 'CRITICAL': diff --git a/requirements.txt b/requirements.txt index 993baf975..f7c7da654 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0 filelock av>=16.0.0 comfy-kitchen==0.2.10 -comfy-aimdo==0.4.9 +comfy-aimdo==0.4.10 requests simpleeval>=1.0.0 blake3 From 83a3f03218cbc055e74ac0bd8e96b7a7f4c4f3e4 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Mon, 15 Jun 2026 23:06:15 +0800 Subject: [PATCH 29/52] chore: update workflow templates to v0.10.0 (#14482) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f7c7da654..392709e64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.45.15 -comfyui-workflow-templates==0.9.98 +comfyui-workflow-templates==0.10.0 comfyui-embedded-docs==0.5.4 torch torchsde From 2f4c4e983c63dc60ae781bcca01e0e17f4f404d6 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:20:01 +0300 Subject: [PATCH 30/52] [Partner Nodes] fix(SoniloTextToMusic): always require "duration" to be specified (#14484) --- comfy_api_nodes/nodes_sonilo.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/comfy_api_nodes/nodes_sonilo.py b/comfy_api_nodes/nodes_sonilo.py index 24a9a0b06..d146f63ea 100644 --- a/comfy_api_nodes/nodes_sonilo.py +++ b/comfy_api_nodes/nodes_sonilo.py @@ -111,11 +111,10 @@ class SoniloTextToMusic(IO.ComfyNode): ), IO.Int.Input( "duration", - default=0, - min=0, + default=30, + min=1, max=360, - tooltip="Target duration in seconds. Set to 0 to let the model " - "infer the duration from the prompt. Maximum: 6 minutes.", + tooltip="Target duration in seconds. Maximum: 6 minutes.", ), IO.Int.Input( "seed", @@ -150,14 +149,13 @@ class SoniloTextToMusic(IO.ComfyNode): async def execute( cls, prompt: str, - duration: int = 0, + duration: int = 1, seed: int = 0, ) -> IO.NodeOutput: - validate_string(prompt, strip_whitespace=True, min_length=1) + validate_string(prompt, strip_whitespace=True, min_length=1, max_length=1000) form = aiohttp.FormData() form.add_field("prompt", prompt) - if duration > 0: - form.add_field("duration", str(duration)) + form.add_field("duration", str(duration)) audio_bytes = await _stream_sonilo_music( cls, ApiEndpoint(path="/proxy/sonilo/t2m/generate", method="POST"), From b13ca1ce7b10565eb162e38fcd433865499d5d38 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:22:24 +1000 Subject: [PATCH 31/52] main: support fallback to aimdo 0.4.9 (#14489) The aimdo 0.4.10 protocol causing startup failure to be too early and before the aimdo version warning can happen. This causes user confusion. Limp on with 0.4.9 as it will work and users will see the version warning. --- main.py | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/main.py b/main.py index 2cdb9caad..82f2bab64 100644 --- a/main.py +++ b/main.py @@ -55,7 +55,11 @@ if __name__ == "__main__" and args.debug_hang: import comfy_aimdo.control if enables_dynamic_vram(): - comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3)) + try: + comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3)) + except TypeError: + # comfy-aimdo 0.4.9 protocol. + comfy_aimdo.control.init() if os.name == "nt": os.environ['MIMALLOC_PURGE_DELAY'] = '0' @@ -231,23 +235,30 @@ import comfy.model_patcher if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()): if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)): logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") - elif comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices()): - if args.verbose == 'DEBUG': - comfy_aimdo.control.set_log_debug() - elif args.verbose == 'CRITICAL': - comfy_aimdo.control.set_log_critical() - elif args.verbose == 'ERROR': - comfy_aimdo.control.set_log_error() - elif args.verbose == 'WARNING': - comfy_aimdo.control.set_log_warning() - else: #INFO - comfy_aimdo.control.set_log_info() - - comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic - comfy.memory_management.aimdo_enabled = True - logging.info("DynamicVRAM support detected and enabled") else: - logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") + try: + aimdo_initialized = comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices()) + except TypeError: + # comfy-aimdo 0.4.9 protocol. + aimdo_initialized = comfy_aimdo.control.init_devices(d.index for d in comfy.model_management.get_all_torch_devices()) + + if aimdo_initialized: + if args.verbose == 'DEBUG': + comfy_aimdo.control.set_log_debug() + elif args.verbose == 'CRITICAL': + comfy_aimdo.control.set_log_critical() + elif args.verbose == 'ERROR': + comfy_aimdo.control.set_log_error() + elif args.verbose == 'WARNING': + comfy_aimdo.control.set_log_warning() + else: #INFO + comfy_aimdo.control.set_log_info() + + comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic + comfy.memory_management.aimdo_enabled = True + logging.info("DynamicVRAM support detected and enabled") + else: + logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") def cuda_malloc_warning(): From 5db51b76b402ba9064af68618aacf81f74c7ca26 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Mon, 15 Jun 2026 22:23:09 -0500 Subject: [PATCH 32/52] Fix odd-height crash and edge bleed in unaligned-width image/video decode (#14491) a1d95f3f padded the decode width to the next multiple of 32 with the pad filter to fix libswscale's float YUV->GBR edge corruption, but kept the pad target height equal to the source height. The pad filter requires the target height to be a multiple of the input's vertical chroma subsampling factor, so a chroma-subsampled input such as yuv420p (the format the gbrpf32le float branch decodes) with an odd height makes the filter round the target below the input height and fail to configure: 'Padded dimensions cannot be smaller than input dimensions' (Errno 22). This is reachable from LoadImage, which routes static images through VideoFromFile, on a lossy WebP whose width is not a multiple of 32 and whose height is odd. The pad filter also fills the added border with black, and chroma upsampling bleeds that black into the cropped edge of every unaligned-width subsampled decode. Pad both axes to the next multiple of 32 (32 is a multiple of every vertical subsampling factor, including yuv410p's 4 that a plain even rounding misses) and run fillborders mode=smear to replicate the real edge into the padding so it never bleeds into the cropped output, then crop both axes back to the source size. Aligned-width and uint8 paths run the identical to_ndarray call as before and are byte-identical to master; only unaligned-width subsampled inputs change, from a crash or edge artifact to a clean, deterministic decode. --- comfy_api/latest/_input_impl/video_types.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 92a1298c0..6c69256ab 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -325,21 +325,25 @@ class VideoFromFile(VideoInput): checked_alpha = True # Fix non-deterministic video decode when the video width is not a multiple of 32 - # For non-yuvj pixel formats (all H.264/H.265 video) + # For non-yuvj pixel formats: most H.264/H.265 video and static images (e.g. lossy WebP via LoadImage) + # Pad both axes to a multiple of 32 and smear the border so the alignment padding never bleeds into the cropped edges if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0: if align_graph is None: pad_w = ((frame.width + 31) // 32) * 32 + pad_h = ((frame.height + 31) // 32) * 32 g = av.filter.Graph() g_src = g.add_buffer(width=frame.width, height=frame.height, format=frame.format.name, time_base=video_stream.time_base) - g_pad = g.add('pad', f'{pad_w}:{frame.height}:0:0') + g_pad = g.add('pad', f'{pad_w}:{pad_h}:0:0') + g_fill = g.add('fillborders', f'left=0:right={pad_w - frame.width}:top=0:bottom={pad_h - frame.height}:mode=smear') g_sink = g.add('buffersink') g_src.link_to(g_pad) - g_pad.link_to(g_sink) + g_pad.link_to(g_fill) + g_fill.link_to(g_sink) g.configure() align_graph = (g, g_src, g_sink) align_graph[1].push(frame) - img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:, :frame.width]) + img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:frame.height, :frame.width]) else: img = frame.to_ndarray(format=image_format) if frame.rotation != 0: From a439dcae07d683a8d52b01b830a6da68953d2969 Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Tue, 16 Jun 2026 11:42:00 +0800 Subject: [PATCH 33/52] Update nodes titles (#14417) --- comfy_extras/nodes_rtdetr.py | 2 +- comfy_extras/nodes_sam3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy_extras/nodes_rtdetr.py b/comfy_extras/nodes_rtdetr.py index e5a9b3902..653f3af2f 100644 --- a/comfy_extras/nodes_rtdetr.py +++ b/comfy_extras/nodes_rtdetr.py @@ -14,7 +14,7 @@ class RTDETR_detect(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="RTDETR_detect", - display_name="RT-DETR Detect", + display_name="Run Real-Time Detection (RT-DETR)", category="image/detection", search_aliases=["bbox", "bounding box", "object detection", "coco"], inputs=[ diff --git a/comfy_extras/nodes_sam3.py b/comfy_extras/nodes_sam3.py index daac52f9b..f88aec925 100644 --- a/comfy_extras/nodes_sam3.py +++ b/comfy_extras/nodes_sam3.py @@ -264,7 +264,7 @@ class SAM3_VideoTrack(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SAM3_VideoTrack", - display_name="SAM3 Video Track", + display_name="Run SAM3 Video Track", category="image/detection", search_aliases=["sam3", "video", "track", "propagate"], inputs=[ From 135abed8da169e33ab0b86550e05e3ae55d6df8c Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 15 Jun 2026 23:27:33 -0400 Subject: [PATCH 34/52] ComfyUI v0.25.0 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index 4e3c924e6..cee317f3d 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.24.0" +__version__ = "0.25.0" diff --git a/pyproject.toml b/pyproject.toml index 4107b4911..54f11d7fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.24.0" +version = "0.25.0" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10" From 86f987ca7c887d0a37daaf341a57c6a0473ddab0 Mon Sep 17 00:00:00 2001 From: Comfy Org PR Bot Date: Tue, 16 Jun 2026 13:24:41 +0900 Subject: [PATCH 35/52] chore(openapi): sync shared API contract from cloud@00ef9cc (#14423) --- openapi.yaml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 6e203b1cd..82ff5b003 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -896,11 +896,6 @@ components: additionalProperties: true description: The workflow graph to execute type: object - prompt_id: - description: Optional client-supplied job id. Must be a UUID in canonical lowercase hyphenated form; it is echoed back in the response. Omitted or null means the server generates one. - format: uuid - nullable: true - type: string workflow_id: description: UUID identifying the cloud workflow entity to associate with this job type: string @@ -1800,7 +1795,9 @@ paths: application/json: schema: $ref: '#/components/schemas/ErrorResponse' - description: Invalid request (no fields provided) + description: | + Invalid request — no fields provided, or `preview_id` is the zero UUID + (`INVALID_PREVIEW_ID`). "401": content: application/json: @@ -1812,7 +1809,10 @@ paths: application/json: schema: $ref: '#/components/schemas/ErrorResponse' - description: Asset not found + description: | + Asset not found — returned both when the asset being updated does + not exist and when `preview_id` does not reference an asset + accessible to the caller. "500": content: application/json: @@ -3050,6 +3050,12 @@ paths: schema: $ref: '#/components/schemas/PromptErrorResponse' description: Payment required - Insufficient credits + "413": + content: + application/json: + schema: + $ref: '#/components/schemas/PromptErrorResponse' + description: Workflow JSON too large "429": content: application/json: From b732aa192f83f926b4ce7e54e6a6224d8e312ce4 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Tue, 16 Jun 2026 10:12:39 +0300 Subject: [PATCH 36/52] [Partner Nodes] chore(SoniloTextToMusic): reduce price by half (#14500) Signed-off-by: bigcat88 --- comfy_api_nodes/nodes_sonilo.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/comfy_api_nodes/nodes_sonilo.py b/comfy_api_nodes/nodes_sonilo.py index d146f63ea..2ad35531a 100644 --- a/comfy_api_nodes/nodes_sonilo.py +++ b/comfy_api_nodes/nodes_sonilo.py @@ -100,8 +100,7 @@ class SoniloTextToMusic(IO.ComfyNode): node_id="SoniloTextToMusic", display_name="Sonilo Text to Music", category="partner/audio/Sonilo", - description="Generate music from a text prompt using Sonilo's AI model. " - "Leave duration at 0 to let the model infer it from the prompt.", + description="Generate music from a text prompt using Sonilo's AI model.", inputs=[ IO.String.Input( "prompt", @@ -135,13 +134,7 @@ class SoniloTextToMusic(IO.ComfyNode): is_api_node=True, price_badge=IO.PriceBadge( depends_on=IO.PriceBadgeDepends(widgets=["duration"]), - expr=""" - ( - widgets.duration > 0 - ? {"type":"usd","usd": 0.005 * widgets.duration} - : {"type":"usd","usd": 0.005, "format":{"suffix":"/second"}} - ) - """, + expr='{"type":"usd","usd": 0.0025 * widgets.duration}', ), ) From d38ea29d6244582404533a8e52cbb069c3e251f6 Mon Sep 17 00:00:00 2001 From: Maksim Date: Tue, 16 Jun 2026 11:21:04 +0300 Subject: [PATCH 37/52] Add the checkbox to disable head drawing in node SDPoseDrawKeypoints (#14446) --- comfy_extras/nodes_sdpose.py | 41 ++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/comfy_extras/nodes_sdpose.py b/comfy_extras/nodes_sdpose.py index 20d459b00..d1cbff2a6 100644 --- a/comfy_extras/nodes_sdpose.py +++ b/comfy_extras/nodes_sdpose.py @@ -96,8 +96,12 @@ class KeypointDraw: # Body connections - matching DWPose limbSeq (1-indexed, converted to 0-indexed) self.body_limbSeq = [ [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], - [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], - [1, 16], [16, 18] + [10, 11], [2, 12], [12, 13], [13, 14] + ] + + # Head connections (1-indexed, converted to 0-indexed) + self.head_edges = [ + [2, 1], [1, 15], [15, 17], [1, 16], [16, 18] ] # Colors matching DWPose @@ -215,7 +219,7 @@ class KeypointDraw: return unique_pts if len(unique_pts) > 1 else [[center[0], center[1]], [center[0], center[1]]] def draw_wholebody_keypoints(self, canvas, keypoints, scores=None, threshold=0.3, - draw_body=True, draw_feet=True, draw_face=True, draw_hands=True, stick_width=4, face_point_size=3): + draw_body=True, draw_head=True, draw_feet=True, draw_face=True, draw_hands=True, stick_width=4, face_point_size=3): """ Draw wholebody keypoints (134 keypoints after processing) in DWPose style. @@ -237,9 +241,17 @@ class KeypointDraw: """ H, W, C = canvas.shape - # Draw body limbs - if draw_body and len(keypoints) >= 18: - for i, limb in enumerate(self.body_limbSeq): + # Draw body limbs & head connections + if (draw_body or draw_head) and len(keypoints) >= 18: + colorIndexOffset = 0 + edges = [] + if draw_body: + edges += self.body_limbSeq + else: + colorIndexOffset += len(self.body_limbSeq) + if draw_head: + edges += self.head_edges + for i, limb in enumerate(edges): # Convert from 1-indexed to 0-indexed idx1, idx2 = limb[0] - 1, limb[1] - 1 @@ -262,11 +274,17 @@ class KeypointDraw: polygon = self.draw.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stick_width), int(angle), 0, 360, 1) - self.draw.fillConvexPoly(canvas, polygon, self.colors[i % len(self.colors)]) + self.draw.fillConvexPoly(canvas, polygon, self.colors[(i + colorIndexOffset) % len(self.colors)]) - # Draw body keypoints - if draw_body and len(keypoints) >= 18: + # Draw body & head keypoints + if (draw_body or draw_head) and len(keypoints) >= 18: + head_keypoints = {0, 14, 15, 16, 17} # nose, eyes, ears + neck_point = 1 for i in range(18): + if not draw_head and i in head_keypoints: + continue + if not draw_body and i not in head_keypoints and i != neck_point: + continue if scores is not None and scores[i] < threshold: continue x, y = int(keypoints[i][0]), int(keypoints[i][1]) @@ -365,6 +383,7 @@ class SDPoseDrawKeypoints(io.ComfyNode): io.Int.Input("stick_width", default=4, min=1, max=10, step=1), io.Int.Input("face_point_size", default=3, min=1, max=10, step=1), io.Float.Input("score_threshold", default=0.3, min=0.0, max=1.0, step=0.01), + io.Boolean.Input("draw_head", default=True), ], outputs=[ io.Image.Output(), @@ -372,7 +391,7 @@ class SDPoseDrawKeypoints(io.ComfyNode): ) @classmethod - def execute(cls, keypoints, draw_body, draw_hands, draw_face, draw_feet, stick_width, face_point_size, score_threshold) -> io.NodeOutput: + def execute(cls, keypoints, draw_body, draw_hands, draw_face, draw_feet, stick_width, face_point_size, score_threshold, draw_head) -> io.NodeOutput: if not keypoints: return io.NodeOutput(torch.zeros((1, 64, 64, 3), dtype=torch.float32)) height = keypoints[0]["canvas_height"] @@ -405,7 +424,7 @@ class SDPoseDrawKeypoints(io.ComfyNode): canvas = drawer.draw_wholebody_keypoints( canvas, kp, sc, threshold=score_threshold, - draw_body=draw_body, draw_feet=draw_feet, + draw_body=draw_body, draw_head=draw_head, draw_feet=draw_feet, draw_face=draw_face, draw_hands=draw_hands, stick_width=stick_width, face_point_size=face_point_size, ) From 90eeeb21390b746da522bb2882af6090d869ba2a Mon Sep 17 00:00:00 2001 From: Octopus Date: Tue, 16 Jun 2026 19:21:36 +0800 Subject: [PATCH 38/52] fix: log base directory to startup messages when --base-directory is used (fixes #13363) (#13370) --- main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.py b/main.py index 82f2bab64..ad5c11e16 100644 --- a/main.py +++ b/main.py @@ -127,6 +127,10 @@ def apply_custom_paths(): for config_path in itertools.chain(*args.extra_model_paths_config): utils.extra_config.load_extra_path_config(config_path) + # --base-directory + if args.base_directory: + logging.info(f"Setting base directory to: {folder_paths.base_path}") + # --output-directory, --input-directory, --user-directory if args.output_directory: output_dir = os.path.abspath(args.output_directory) From fc964047e7f6e837eca776e7c34706c04690ecfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Wed, 17 Jun 2026 03:12:44 +0300 Subject: [PATCH 39/52] feat: Support text generation with Qwen3-VL (CORE-276) (#14298) --- comfy/sd.py | 15 +++ comfy/text_encoders/ideogram4.py | 41 +++++++ comfy/text_encoders/llama.py | 35 +++++- comfy/text_encoders/qwen35.py | 35 ++---- comfy/text_encoders/qwen3vl.py | 193 +++++++++++++++++++++++++++++++ comfy/text_encoders/qwen_vl.py | 26 +++++ comfy_extras/nodes_textgen.py | 2 +- 7 files changed, 317 insertions(+), 30 deletions(-) create mode 100644 comfy/text_encoders/qwen3vl.py diff --git a/comfy/sd.py b/comfy/sd.py index a66ba1bfb..688e6db90 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -67,6 +67,7 @@ import comfy.text_encoders.anima import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image import comfy.text_encoders.qwen35 +import comfy.text_encoders.qwen3vl import comfy.text_encoders.ernie import comfy.text_encoders.gemma4 import comfy.text_encoders.cogvideo @@ -1353,6 +1354,8 @@ class TEModel(Enum): GEMMA_4_31B = 31 T5_GEMMA = 32 GPT_OSS_20B = 33 + QWEN3VL_4B = 34 + QWEN3VL_8B = 35 def detect_te_model(sd): @@ -1414,6 +1417,8 @@ def detect_te_model(sd): if weight.shape[0] == 5120: return TEModel.QWEN35_27B return TEModel.QWEN35_2B + if "model.visual.deepstack_merger_list.0.norm.weight" in sd: # DeepStack is unique to Qwen3-VL + return TEModel.QWEN3VL_4B if sd["model.visual.merger.linear_fc2.weight"].shape[0] == 2560 else TEModel.QWEN3VL_8B if "model.layers.0.post_attention_layernorm.weight" in sd: weight = sd['model.layers.0.post_attention_layernorm.weight'] if 'model.layers.0.self_attn.q_norm.weight' in sd: @@ -1612,6 +1617,16 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip qwen35_type = {TEModel.QWEN35_08B: "qwen35_08b", TEModel.QWEN35_2B: "qwen35_2b", TEModel.QWEN35_4B: "qwen35_4b", TEModel.QWEN35_9B: "qwen35_9b", TEModel.QWEN35_27B: "qwen35_27b"}[te_model] clip_target.clip = comfy.text_encoders.qwen35.te(**llama_detect(clip_data), model_type=qwen35_type) clip_target.tokenizer = comfy.text_encoders.qwen35.tokenizer(model_type=qwen35_type) + elif te_model in (TEModel.QWEN3VL_4B, TEModel.QWEN3VL_8B): + if clip_type == CLIPType.IDEOGRAM4 and te_model == TEModel.QWEN3VL_8B: # Ideogram4 reuses the full Qwen3-VL-8B (13-layer tap for conditioning + multimodal generate). + clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) + clip_target.clip = comfy.text_encoders.ideogram4.te_qwen3vl(**llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Qwen3VLTokenizer + else: + clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) + qwen3vl_type = {TEModel.QWEN3VL_4B: "qwen3vl_4b", TEModel.QWEN3VL_8B: "qwen3vl_8b"}[te_model] + clip_target.clip = comfy.text_encoders.qwen3vl.te(**llama_detect(clip_data), model_type=qwen3vl_type) + clip_target.tokenizer = comfy.text_encoders.qwen3vl.tokenizer(model_type=qwen3vl_type) elif te_model == TEModel.QWEN3_06B: clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer diff --git a/comfy/text_encoders/ideogram4.py b/comfy/text_encoders/ideogram4.py index 84243772d..151b43c53 100644 --- a/comfy/text_encoders/ideogram4.py +++ b/comfy/text_encoders/ideogram4.py @@ -9,6 +9,7 @@ import os from transformers import Qwen2Tokenizer import comfy.text_encoders.llama +import comfy.text_encoders.qwen3vl from comfy import sd1_clip # Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1. @@ -77,3 +78,43 @@ def te(dtype_llama=None, llama_quantization_metadata=None): model_options["quantization_metadata"] = llama_quantization_metadata super().__init__(device=device, dtype=dtype, model_options=model_options) return Ideogram4TEModel_ + + +# Full Qwen3-VL-8B variant with vision + +class Ideogram4Qwen3VLClipModel(comfy.text_encoders.qwen3vl.Qwen3VLClipModel): + def __init__(self, device="cpu", dtype=None, attention_mask=True, model_options={}): + super().__init__(device=device, layer=IDEOGRAM4_TAP_LAYERS, layer_idx=None, dtype=dtype, + attention_mask=attention_mask, model_options=model_options, model_type="qwen3vl_8b") + + +class Ideogram4Qwen3VLTEModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=Ideogram4Qwen3VLClipModel, model_options=model_options) + + def encode_token_weights(self, token_weight_pairs): + out, pooled, extra = super().encode_token_weights(token_weight_pairs) + b, n, seq, h = out.shape # (B, n_taps=13, seq, 4096), ascending layer order. + out = out.permute(0, 2, 3, 1).reshape(b, seq, h * n) # (B, seq, 4096*13 = 53248). + return out, pooled, extra + + +class Ideogram4Qwen3VLTokenizer(comfy.text_encoders.qwen3vl.Qwen3VLTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type="qwen3vl_8b") + + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=True, **kwargs): + # Ideogram 4 conditions on the no-think template; default thinking=True drops the empty think block qwen3vl adds. + return super().tokenize_with_weights(text, return_word_ids=return_word_ids, llama_template=llama_template, images=images, prevent_empty_text=prevent_empty_text, thinking=thinking, **kwargs) + + +def te_qwen3vl(dtype_llama=None, llama_quantization_metadata=None): + class Ideogram4Qwen3VLTEModel_(Ideogram4Qwen3VLTEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if dtype_llama is not None: + dtype = dtype_llama + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + super().__init__(device=device, dtype=dtype, model_options=model_options) + return Ideogram4Qwen3VLTEModel_ diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py index 5087228ca..e9f38a9a2 100644 --- a/comfy/text_encoders/llama.py +++ b/comfy/text_encoders/llama.py @@ -251,6 +251,19 @@ class Qwen3_8BConfig: lm_head: bool = True stop_tokens = [151643, 151645] +@dataclass +class Qwen3VL_8BConfig(Qwen3_8BConfig): + max_position_embeddings: int = 262144 + rope_theta: float = 5000000.0 + rope_dims = [24, 20, 20] + interleaved_mrope = True + +@dataclass +class Qwen3VL_4BConfig(Qwen3VL_8BConfig): + hidden_size: int = 2560 + intermediate_size: int = 9728 + lm_head: bool = False # 4B ties word embeddings + @dataclass class Ovis25_2BConfig: vocab_size: int = 151936 @@ -703,7 +716,8 @@ class Llama2_(nn.Module): interleaved_mrope=getattr(self.config, "interleaved_mrope", False), device=device) - def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None): + def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, + dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None,deepstack_embeds=None, visual_pos_masks=None): if embeds is not None: x = embeds else: @@ -767,6 +781,10 @@ class Llama2_(nn.Module): if current_kv is not None: next_key_values.append(current_kv) + # DeepStack: add per-layer visual features into the first len() decoder layers at image positions (Qwen3-VL) + if deepstack_embeds is not None and i < len(deepstack_embeds): + x[visual_pos_masks] = x[visual_pos_masks] + deepstack_embeds[i].to(x) + if i == intermediate_output: intermediate = x.clone() @@ -860,7 +878,7 @@ class BaseGenerate: torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0)) return past_key_values - def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None): + def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None, position_ids=None, deepstack_embeds=None, visual_pos_masks=None): device = embeds.device if stop_tokens is None: @@ -884,10 +902,18 @@ class BaseGenerate: generated_token_ids = [] pbar = comfy.utils.ProgressBar(max_length) + # MRoPE: prefill uses explicit 3D position_ids, decode continues from the last position + next_pos = int(position_ids[:, -1].max()) + 1 if position_ids is not None else None + # Generation loop current_input_ids = initial_input_ids for step in tqdm(range(max_length), desc="Generating tokens"): - x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids) + # DeepStack visual features are injected on the prefill only; gemma4's forward lacks these kwargs. + extra = {} + if step == 0 and deepstack_embeds is not None: + extra["deepstack_embeds"] = deepstack_embeds + extra["visual_pos_masks"] = visual_pos_masks + x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids, position_ids=position_ids, **extra) logits = self.logits(x)[:, -1] next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty) token_id = next_token[0].item() @@ -895,6 +921,9 @@ class BaseGenerate: embeds = self.model.embed_tokens(next_token).to(execution_dtype) current_input_ids = next_token if initial_input_ids is not None else None + if next_pos is not None: # advance MRoPE position for the next (decode) step + position_ids = torch.tensor([[next_pos]], device=device) + next_pos += 1 pbar.update(1) if token_id in stop_tokens: diff --git a/comfy/text_encoders/qwen35.py b/comfy/text_encoders/qwen35.py index 416ce9d18..71a17990f 100644 --- a/comfy/text_encoders/qwen35.py +++ b/comfy/text_encoders/qwen35.py @@ -3,7 +3,6 @@ import torch.nn as nn import torch.nn.functional as F from dataclasses import dataclass, field import os -import math import comfy.model_management from comfy.ldm.modules.attention import optimized_attention_for_device @@ -563,6 +562,8 @@ class Qwen35VisionModel(nn.Module): for _ in range(config["depth"]) ]) self.merger = Qwen35VisionPatchMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops) + self.deepstack_visual_indexes = [] # DeepStack, per-layer visual features (Qwen3-VL) + self.deepstack_merger_list = None def rot_pos_emb(self, grid_thw): merge_size = self.spatial_merge_size @@ -664,9 +665,14 @@ class Qwen35VisionModel(nn.Module): ).cumsum(dim=0, dtype=torch.int32) cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) optimized_attention = optimized_attention_for_device(x.device, mask=False, small_input=True) - for blk in self.blocks: + deepstack_features = [] + for layer_num, blk in enumerate(self.blocks): x = blk(x, cu_seqlens=cu_seqlens, position_embeddings=position_embeddings, optimized_attention=optimized_attention) + if self.deepstack_merger_list is not None and layer_num in self.deepstack_visual_indexes: + deepstack_features.append(self.deepstack_merger_list[self.deepstack_visual_indexes.index(layer_num)](x)) merged = self.merger(x) + if self.deepstack_merger_list is not None: + return merged, deepstack_features return merged # Model Wrapper @@ -690,30 +696,7 @@ class Qwen35(BaseLlama, BaseGenerate, torch.nn.Module): return None, None def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, embeds_info=[], past_key_values=None): - grid = None - position_ids = None - offset = 0 - for e in embeds_info: - if e.get("type") == "image": - grid = e.get("extra", None) - start = e.get("index") - if position_ids is None: - position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device) - position_ids[:, :start] = torch.arange(0, start, device=embeds.device) - end = e.get("size") + start - len_max = int(grid.max()) // 2 - start_next = len_max + start - position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device) - position_ids[0, start:end] = start + offset - max_d = int(grid[0][1]) // 2 - position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start] - max_d = int(grid[0][2]) // 2 - position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start] - offset += len_max - (end - start) - - if grid is None: - position_ids = None - + position_ids = comfy.text_encoders.qwen_vl.qwen2vl_mrope_position_ids(embeds_info, embeds.shape[1], embeds.device) return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids, past_key_values=past_key_values) def init_kv_cache(self, batch, max_cache_len, device, execution_dtype): diff --git a/comfy/text_encoders/qwen3vl.py b/comfy/text_encoders/qwen3vl.py new file mode 100644 index 000000000..59c9aae6d --- /dev/null +++ b/comfy/text_encoders/qwen3vl.py @@ -0,0 +1,193 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import Qwen2Tokenizer + +from comfy import sd1_clip +import comfy.text_encoders.qwen_vl +from .qwen35 import Qwen35VisionModel +from .llama import BaseLlama, BaseQwen3, BaseGenerate, Llama2_, Qwen3VL_4BConfig, Qwen3VL_8BConfig + + +QWEN3VL_VISION = { + "qwen3vl_4b": dict(hidden_size=1024, intermediate_size=4096, depth=24, deepstack_visual_indexes=[5, 11, 17]), + "qwen3vl_8b": dict(hidden_size=1152, intermediate_size=4304, depth=27, deepstack_visual_indexes=[8, 16, 24]), +} +QWEN3VL_VISION_COMMON = dict(num_heads=16, patch_size=16, temporal_patch_size=2, in_channels=3, + spatial_merge_size=2, num_position_embeddings=2304) + +QWEN3VL_CONFIGS = {"qwen3vl_4b": Qwen3VL_4BConfig, "qwen3vl_8b": Qwen3VL_8BConfig} + + +class Qwen3VLDeepstackMerger(nn.Module): + # DeepStack merger: postshuffle LayerNorm (applied after spatial merge), unlike the main merger. + def __init__(self, hidden_size, spatial_merge_size, out_hidden_size, device=None, dtype=None, ops=None): + super().__init__() + self.merge_dim = hidden_size * (spatial_merge_size ** 2) + self.norm = ops.LayerNorm(self.merge_dim, eps=1e-6, device=device, dtype=dtype) + self.linear_fc1 = ops.Linear(self.merge_dim, self.merge_dim, device=device, dtype=dtype) + self.linear_fc2 = ops.Linear(self.merge_dim, out_hidden_size, device=device, dtype=dtype) + + def forward(self, x): + x = self.norm(x.view(-1, self.merge_dim)) + return self.linear_fc2(F.gelu(self.linear_fc1(x))) + + +class Qwen3VLVisionModel(Qwen35VisionModel): + # Qwen3.5 vision + DeepStack + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__(config, device=device, dtype=dtype, ops=ops) + self.deepstack_visual_indexes = config["deepstack_visual_indexes"] + self.deepstack_merger_list = nn.ModuleList([ + Qwen3VLDeepstackMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops) + for _ in self.deepstack_visual_indexes + ]) + + +class Qwen3VL(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): + model_type = "qwen3vl_8b" + + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = QWEN3VL_CONFIGS[self.model_type](**config_dict) + self.num_layers = config.num_hidden_layers + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + vision_config = {**QWEN3VL_VISION_COMMON, **QWEN3VL_VISION[self.model_type], "out_hidden_size": config.hidden_size} + self.visual = Qwen3VLVisionModel(vision_config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + + def preprocess_embed(self, embed, device): + if embed["type"] == "image": + # Qwen3-VL normalizes to [-1, 1] (mean/std 0.5), unlike Qwen2.5-VL's CLIP normalization. + image, grid = comfy.text_encoders.qwen_vl.process_qwen2vl_images(embed["data"], patch_size=16, image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5]) + merged, deepstack = self.visual(image.to(device, dtype=torch.float32), grid) + return merged, {"grid": grid, "deepstack": deepstack} + return None, None + + def build_image_inputs(self, embeds, embeds_info): + # Returns (position_ids, visual_pos_masks, deepstack) for the prompt + images = sorted([e for e in embeds_info if e.get("type") == "image"], key=lambda e: e["index"]) + if len(images) == 0: + return None, None, None + + device = embeds.device + seq = embeds.shape[1] + position_ids = comfy.text_encoders.qwen_vl.qwen2vl_mrope_position_ids(embeds_info, seq, device) + + # DeepStack: mask of image positions + per-vision-layer features to inject there. + visual_pos_masks = torch.zeros((1, seq), dtype=torch.bool, device=device) + deepstack = None + for e in images: + start = e["index"] + end = e["size"] + start + visual_pos_masks[0, start:end] = True + ds = e["extra"]["deepstack"] + if deepstack is None: + deepstack = [d for d in ds] + else: + deepstack = [torch.cat([deepstack[i], ds[i]], dim=0) for i in range(len(ds))] + return position_ids, visual_pos_masks, deepstack + + +def _make_qwen3vl_model(model_type): + class Qwen3VL_(Qwen3VL): + pass + Qwen3VL_.model_type = model_type + return Qwen3VL_ + + +class Qwen3VLClipModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="hidden", layer_idx=-1, dtype=None, attention_mask=True, model_options={}, model_type="qwen3vl_8b"): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, + dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, + model_class=_make_qwen3vl_model(model_type), enable_attention_masks=attention_mask, + return_attention_masks=attention_mask, model_options=model_options) + + def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=0.0): + if isinstance(tokens, dict): + tokens = next(iter(tokens.values())) + tokens_only = [[t[0] for t in b] for b in tokens] + embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device) + position_ids, visual_pos_masks, deepstack = self.transformer.build_image_inputs(embeds, embeds_info) + return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, + presence_penalty=presence_penalty, position_ids=position_ids, + visual_pos_masks=visual_pos_masks, deepstack_embeds=deepstack) + + +class Qwen3VLTEModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}, model_type="qwen3vl_8b"): + clip_model = lambda **kw: Qwen3VLClipModel(**kw, model_type=model_type) + super().__init__(device=device, dtype=dtype, name=model_type, clip_model=clip_model, model_options=model_options) + + +class Qwen3VLSDTokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}, embedding_size=4096, embedding_key="qwen3vl_8b"): + tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_class=Qwen2Tokenizer, + has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data) + + +class Qwen3VLTokenizer(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}, model_type="qwen3vl_8b"): + embedding_size = 2560 if model_type == "qwen3vl_4b" else 4096 + tokenizer = lambda *a, **kw: Qwen3VLSDTokenizer(*a, **kw, embedding_size=embedding_size, embedding_key=model_type) + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name=model_type, tokenizer=tokenizer) + self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n" + self.llama_template_images = "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n" + + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=False, **kwargs): + image = kwargs.get("image", None) + if image is not None and len(images) == 0: + images = [image[i:i + 1] for i in range(image.shape[0])] + + skip_template = text.startswith('<|im_start|>') + if prevent_empty_text and text == '': + text = ' ' + + if skip_template: + llama_text = text + else: + if llama_template is not None: + template = llama_template + elif len(images) == 0: + template = self.llama_template + else: + template = self.llama_template_images + if len(images) > 1: + vision_block = "<|vision_start|><|image_pad|><|vision_end|>" + template = template.replace(vision_block, vision_block * len(images), 1) + llama_text = template.format(text) + if not thinking: # Qwen3 convention: empty think block suppresses reasoning + llama_text += "\n\n\n\n" + + tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs) + key_name = next(iter(tokens)) + embed_count = 0 + for r in tokens[key_name]: + for i in range(len(r)): + if r[i][0] == 151655: # <|image_pad|> + if len(images) > embed_count: + r[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"},) + r[i][1:] + embed_count += 1 + return tokens + + +def tokenizer(model_type="qwen3vl_8b"): + class Qwen3VLTokenizer_(Qwen3VLTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type=model_type) + return Qwen3VLTokenizer_ + + +def te(dtype_llama=None, llama_quantization_metadata=None, model_type="qwen3vl_8b"): + class Qwen3VLTEModel_(Qwen3VLTEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if dtype_llama is not None: + dtype = dtype_llama + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + super().__init__(device=device, dtype=dtype, model_options=model_options, model_type=model_type) + return Qwen3VLTEModel_ diff --git a/comfy/text_encoders/qwen_vl.py b/comfy/text_encoders/qwen_vl.py index 98c350a12..924eb6ad8 100644 --- a/comfy/text_encoders/qwen_vl.py +++ b/comfy/text_encoders/qwen_vl.py @@ -88,6 +88,32 @@ def process_qwen2vl_images( return flatten_patches, image_grid_thw +def qwen2vl_mrope_position_ids(embeds_info, seq_len, device): + # (3, seq_len) T/H/W MRoPE position ids: text runs sequentially, each image span gets its grid positions. + # Returns None when there are no image embeds. `extra` is the image grid_thw, or a dict carrying it under "grid". + position_ids = None + offset = 0 + for e in embeds_info: + if e.get("type") == "image": + extra = e.get("extra", None) + grid = extra["grid"] if isinstance(extra, dict) else extra + start = e.get("index") + if position_ids is None: + position_ids = torch.zeros((3, seq_len), device=device) + position_ids[:, :start] = torch.arange(0, start, device=device) + end = e.get("size") + start + len_max = int(grid.max()) // 2 + start_next = len_max + start + position_ids[:, end:] = torch.arange(start_next + offset, start_next + (seq_len - end) + offset, device=device) + position_ids[0, start:end] = start + offset + max_d = int(grid[0][1]) // 2 + position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start] + max_d = int(grid[0][2]) // 2 + position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start] + offset += len_max - (end - start) + return position_ids + + class VisionPatchEmbed(nn.Module): def __init__( self, diff --git a/comfy_extras/nodes_textgen.py b/comfy_extras/nodes_textgen.py index d52faf815..5a947d5c5 100644 --- a/comfy_extras/nodes_textgen.py +++ b/comfy_extras/nodes_textgen.py @@ -35,7 +35,7 @@ class TextGenerate(io.ComfyNode): io.Image.Input("image", optional=True), io.Image.Input("video", optional=True, tooltip="Video frames as image batch. Assumed to be 24 FPS; subsampled to 1 FPS internally."), io.Audio.Input("audio", optional=True), - io.Int.Input("max_length", default=256, min=1, max=2048), + io.Int.Input("max_length", default=512, min=1, max=32768), io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"), io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."), io.Boolean.Input("use_default_template", optional=True, default=True, tooltip="Use the built in system prompt/template if the model has one.", advanced=True), From ca1622ca24bbdbbc19721b0577ffab98cf64eb4d Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Wed, 17 Jun 2026 08:33:09 +0800 Subject: [PATCH 40/52] chore: Update nodes categories (CORE-263) (#14460) --- comfy_extras/nodes_ace.py | 12 ++-- comfy_extras/nodes_apg.py | 2 +- comfy_extras/nodes_ar_video.py | 4 +- comfy_extras/nodes_audio.py | 10 ++-- comfy_extras/nodes_bernini.py | 23 +++----- comfy_extras/nodes_camera_trajectory.py | 2 +- comfy_extras/nodes_chroma_radiance.py | 4 +- comfy_extras/nodes_clip_sdxl.py | 6 +- comfy_extras/nodes_context_windows.py | 1 + comfy_extras/nodes_controlnet.py | 3 + comfy_extras/nodes_cosmos.py | 6 +- comfy_extras/nodes_custom_sampler.py | 6 +- comfy_extras/nodes_easycache.py | 4 +- comfy_extras/nodes_edit_model.py | 3 +- comfy_extras/nodes_flux.py | 12 ++-- comfy_extras/nodes_hidream.py | 8 ++- comfy_extras/nodes_hidream_o1.py | 6 +- comfy_extras/nodes_hunyuan.py | 20 ++++--- comfy_extras/nodes_hunyuan3d.py | 8 +-- comfy_extras/nodes_ideogram4.py | 2 +- comfy_extras/nodes_kandinsky5.py | 7 ++- comfy_extras/nodes_latent.py | 2 + comfy_extras/nodes_lt.py | 22 +++---- comfy_extras/nodes_lt_audio.py | 12 ++-- comfy_extras/nodes_lt_upsampler.py | 2 +- comfy_extras/nodes_lumina2.py | 6 +- comfy_extras/nodes_mask.py | 1 + comfy_extras/nodes_mochi.py | 2 +- comfy_extras/nodes_model_advanced.py | 19 +++--- comfy_extras/nodes_model_merging.py | 22 +++---- .../nodes_model_merging_model_specific.py | 28 ++++----- comfy_extras/nodes_model_patch.py | 14 ++++- comfy_extras/nodes_pid.py | 6 +- comfy_extras/nodes_pixart.py | 3 +- comfy_extras/nodes_post_processing.py | 2 +- comfy_extras/nodes_qwen.py | 4 +- comfy_extras/nodes_scail.py | 16 +++-- comfy_extras/nodes_sd3.py | 10 ++-- comfy_extras/nodes_sdupscale.py | 2 +- comfy_extras/nodes_stable3d.py | 6 +- comfy_extras/nodes_stable_cascade.py | 6 +- comfy_extras/nodes_train.py | 2 +- comfy_extras/nodes_video_model.py | 7 ++- comfy_extras/nodes_void.py | 6 +- comfy_extras/nodes_wan.py | 33 ++++++----- comfy_extras/nodes_wandancer.py | 4 +- comfy_extras/nodes_wanmove.py | 11 ++-- comfy_extras/nodes_zimage.py | 2 +- nodes.py | 58 ++++++++++--------- 49 files changed, 240 insertions(+), 217 deletions(-) diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py index 044077b18..eaf234d5b 100644 --- a/comfy_extras/nodes_ace.py +++ b/comfy_extras/nodes_ace.py @@ -11,7 +11,7 @@ class TextEncodeAceStepAudio(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="TextEncodeAceStepAudio", - category="model/conditioning", + category="model/conditioning/ace", inputs=[ IO.Clip.Input("clip"), IO.String.Input("tags", multiline=True, dynamic_prompts=True), @@ -33,7 +33,7 @@ class TextEncodeAceStepAudio15(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="TextEncodeAceStepAudio1.5", - category="model/conditioning", + category="model/conditioning/ace", inputs=[ IO.Clip.Input("clip"), IO.String.Input("tags", multiline=True, dynamic_prompts=True), @@ -67,7 +67,7 @@ class EmptyAceStepLatentAudio(IO.ComfyNode): return IO.Schema( node_id="EmptyAceStepLatentAudio", display_name="Empty Ace Step 1.0 Latent Audio", - category="model/latent/audio", + category="model/latent/ace", inputs=[ IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1), IO.Int.Input( @@ -90,7 +90,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode): return IO.Schema( node_id="EmptyAceStep1.5LatentAudio", display_name="Empty Ace Step 1.5 Latent Audio", - category="model/latent/audio", + category="model/latent/ace", inputs=[ IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01), IO.Int.Input( @@ -111,8 +111,8 @@ class ReferenceAudio(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="ReferenceTimbreAudio", - display_name="Reference Audio", - category="advanced/conditioning/audio", + display_name="Set Reference Audio", + category="model/conditioning", is_experimental=True, description="This node sets the reference audio for ace step 1.5", inputs=[ diff --git a/comfy_extras/nodes_apg.py b/comfy_extras/nodes_apg.py index 4a352038a..6e69b73f7 100644 --- a/comfy_extras/nodes_apg.py +++ b/comfy_extras/nodes_apg.py @@ -16,7 +16,7 @@ class APG(io.ComfyNode): return io.Schema( node_id="APG", display_name="Adaptive Projected Guidance", - category="model/sampling/custom_sampling", + category="model/sampling/custom", inputs=[ io.Model.Input("model"), io.Float.Input( diff --git a/comfy_extras/nodes_ar_video.py b/comfy_extras/nodes_ar_video.py index c22359eb2..9d8f64b20 100644 --- a/comfy_extras/nodes_ar_video.py +++ b/comfy_extras/nodes_ar_video.py @@ -19,7 +19,7 @@ class EmptyARVideoLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="EmptyARVideoLatent", - category="model/latent/video", + category="model/latent/autoregressive", inputs=[ io.Int.Input("width", default=832, min=16, max=8192, step=16), io.Int.Input("height", default=480, min=16, max=8192, step=16), @@ -85,7 +85,7 @@ class ARVideoI2V(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ARVideoI2V", - category="model/conditioning/video_models", + category="model/conditioning/autoregressive", inputs=[ io.Model.Input("model"), io.Vae.Input("vae"), diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 1dc97ecd7..77f124e28 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -16,7 +16,7 @@ class EmptyLatentAudio(IO.ComfyNode): return IO.Schema( node_id="EmptyLatentAudio", display_name="Empty Latent Audio", - category="model/latent/audio", + category="model/latent", essentials_category="Audio", inputs=[ IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1), @@ -41,7 +41,7 @@ class ConditioningStableAudio(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="ConditioningStableAudio", - category="model/conditioning", + category="model/conditioning/stable audio", inputs=[ IO.Conditioning.Input("positive"), IO.Conditioning.Input("negative"), @@ -70,7 +70,7 @@ class VAEEncodeAudio(IO.ComfyNode): node_id="VAEEncodeAudio", search_aliases=["audio to latent"], display_name="VAE Encode Audio", - category="model/latent/audio", + category="model/latent", inputs=[ IO.Audio.Input("audio"), IO.Vae.Input("vae"), @@ -115,7 +115,7 @@ class VAEDecodeAudio(IO.ComfyNode): node_id="VAEDecodeAudio", search_aliases=["latent to audio"], display_name="VAE Decode Audio", - category="model/latent/audio", + category="model/latent", inputs=[ IO.Latent.Input("samples"), IO.Vae.Input("vae"), @@ -137,7 +137,7 @@ class VAEDecodeAudioTiled(IO.ComfyNode): node_id="VAEDecodeAudioTiled", search_aliases=["latent to audio"], display_name="VAE Decode Audio (Tiled)", - category="model/latent/audio", + category="model/latent", inputs=[ IO.Latent.Input("samples"), IO.Vae.Input("vae"), diff --git a/comfy_extras/nodes_bernini.py b/comfy_extras/nodes_bernini.py index 227fa5753..0537e0806 100644 --- a/comfy_extras/nodes_bernini.py +++ b/comfy_extras/nodes_bernini.py @@ -39,9 +39,9 @@ class BerniniConditioning(io.ComfyNode): return io.Schema( node_id="BerniniConditioning", display_name="Bernini Conditioning", - category="conditioning/video_models", + category="model/conditioning/bernini", description="Conditioning node for Bernini in-context video/image conditioning. It can be used for the following tasks: t2v (text-to-video), v2v (video-to-video), rv2v (reference-guided video editing), r2v (reference-to-video), ads2v (insert image/video into video)." - "Reference images injected as in-context tokens (r2v, rv2v) are encoded independently at their own native aspect ratio (long edge capped at ref_max_size)", + "Reference images injected as in-context tokens (r2v, rv2v) are encoded independently at their own native aspect ratio (long edge capped at ref_max_size)", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -50,14 +50,11 @@ class BerniniConditioning(io.ComfyNode): io.Int.Input("height", default=480, min=16, max=8192, step=16), io.Int.Input("length", default=81, min=1, max=8192, step=4), io.Int.Input("batch_size", default=1, min=1, max=4096), - io.Image.Input("source_video", optional=True, tooltip=( - "Source video to edit or restyle (v2v, rv2v). Resized to width/height and trimmed to length.")), - io.Image.Input("reference_video", optional=True, tooltip=( - "Video to insert into the source video (ads2v).")), + io.Image.Input("source_video", optional=True, tooltip=("Source video to edit or restyle (v2v, rv2v). Resized to width/height and trimmed to length.")), + io.Image.Input("reference_video", optional=True, tooltip=("Video to insert into the source video (ads2v).")), io.Autogrow.Input("reference_images", optional=True, template=io.Autogrow.TemplatePrefix( - input=io.Image.Input("reference_image", tooltip=( - "Reference image injected as an in-context token (r2v, rv2v).")), + input=io.Image.Input("reference_image", tooltip=("Reference image injected as an in-context token (r2v, rv2v).")), prefix="reference_image_", min=0, max=8)), io.Int.Input("ref_max_size", default=848, min=16, max=8192, step=16, optional=True, tooltip=( "Max size for the long edge of reference_video and reference_images. Resized with preserved aspect ratio and snapped to 16px.")), @@ -70,10 +67,8 @@ class BerniniConditioning(io.ComfyNode): ) @classmethod - def execute(cls, positive, negative, vae, width, height, length, batch_size, - source_video=None, reference_video=None, reference_images=None, ref_max_size=848) -> io.NodeOutput: - latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], - device=comfy.model_management.intermediate_device()) + def execute(cls, positive, negative, vae, width, height, length, batch_size, source_video=None, reference_video=None, reference_images=None, ref_max_size=848) -> io.NodeOutput: + latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) # source_video (1), reference_video (2), reference_images (3, 4, ...). context = [] @@ -106,9 +101,7 @@ class BerniniConditioning(io.ComfyNode): class BerniniExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[io.ComfyNode]]: - return [ - BerniniConditioning, - ] + return [BerniniConditioning,] async def comfy_entrypoint() -> BerniniExtension: diff --git a/comfy_extras/nodes_camera_trajectory.py b/comfy_extras/nodes_camera_trajectory.py index 13a1448f4..280d136af 100644 --- a/comfy_extras/nodes_camera_trajectory.py +++ b/comfy_extras/nodes_camera_trajectory.py @@ -153,7 +153,7 @@ class WanCameraEmbedding(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanCameraEmbedding", - category="model/conditioning/video_models", + category="model/conditioning/wan/camera", inputs=[ io.Combo.Input( "camera_pose", diff --git a/comfy_extras/nodes_chroma_radiance.py b/comfy_extras/nodes_chroma_radiance.py index a4f673001..059344f3c 100644 --- a/comfy_extras/nodes_chroma_radiance.py +++ b/comfy_extras/nodes_chroma_radiance.py @@ -13,7 +13,7 @@ class EmptyChromaRadianceLatentImage(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="EmptyChromaRadianceLatentImage", - category="model/latent/chroma_radiance", + category="model/latent/chroma radiance", inputs=[ io.Int.Input(id="width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input(id="height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -33,7 +33,7 @@ class ChromaRadianceOptions(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="ChromaRadianceOptions", - category="model/patch/chroma_radiance", + category="model/patch/chroma radiance", description="Allows setting advanced options for the Chroma Radiance model.", inputs=[ io.Model.Input(id="model"), diff --git a/comfy_extras/nodes_clip_sdxl.py b/comfy_extras/nodes_clip_sdxl.py index 7a001af6f..08fbbd827 100644 --- a/comfy_extras/nodes_clip_sdxl.py +++ b/comfy_extras/nodes_clip_sdxl.py @@ -9,7 +9,8 @@ class CLIPTextEncodeSDXLRefiner(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeSDXLRefiner", - category="advanced/conditioning", + display_name="CLIP Text Encode (SDXL Refiner)", + category="model/conditioning/stable diffusion", inputs=[ io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), @@ -30,7 +31,8 @@ class CLIPTextEncodeSDXL(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeSDXL", - category="advanced/conditioning", + display_name="CLIP Text Encode (SDXL)", + category="model/conditioning/stable diffusion", inputs=[ io.Clip.Input("clip"), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), diff --git a/comfy_extras/nodes_context_windows.py b/comfy_extras/nodes_context_windows.py index d9e32b9d9..098c26f23 100644 --- a/comfy_extras/nodes_context_windows.py +++ b/comfy_extras/nodes_context_windows.py @@ -66,6 +66,7 @@ class WanContextWindowsManualNode(ContextWindowsManualNode): schema.node_id = "WanContextWindowsManual" schema.display_name = "WAN Context Windows (Manual)" schema.description = "Manually set context windows for WAN-like models (dim=2)." + schema.category="model/patch/wan" schema.inputs = [ io.Model.Input("model", tooltip="The model to apply context windows to during sampling."), io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window.", advanced=True), diff --git a/comfy_extras/nodes_controlnet.py b/comfy_extras/nodes_controlnet.py index 17d965405..eb476f497 100644 --- a/comfy_extras/nodes_controlnet.py +++ b/comfy_extras/nodes_controlnet.py @@ -9,6 +9,8 @@ class SetUnionControlNetType(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SetUnionControlNetType", + search_aliases=["set controlnet type", "union controlnet type"], + display_name="Set Union ControlNet Type", category="model/conditioning/controlnet", inputs=[ io.ControlNet.Input("control_net"), @@ -39,6 +41,7 @@ class ControlNetInpaintingAliMamaApply(io.ComfyNode): return io.Schema( node_id="ControlNetInpaintingAliMamaApply", search_aliases=["masked controlnet"], + display_name="Apply ControlNet Inpainting (AliMama)", category="model/conditioning/controlnet", inputs=[ io.Conditioning.Input("positive"), diff --git a/comfy_extras/nodes_cosmos.py b/comfy_extras/nodes_cosmos.py index d754ab442..93cc67a6c 100644 --- a/comfy_extras/nodes_cosmos.py +++ b/comfy_extras/nodes_cosmos.py @@ -13,7 +13,7 @@ class EmptyCosmosLatentVideo(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="EmptyCosmosLatentVideo", - category="model/latent/video", + category="model/latent/cosmos", inputs=[ io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=704, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -45,7 +45,7 @@ class CosmosImageToVideoLatent(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="CosmosImageToVideoLatent", - category="model/conditioning/inpaint", + category="model/conditioning/cosmos", inputs=[ io.Vae.Input("vae"), io.Int.Input("width", default=1280, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -88,7 +88,7 @@ class CosmosPredict2ImageToVideoLatent(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="CosmosPredict2ImageToVideoLatent", - category="model/conditioning/inpaint", + category="model/conditioning/cosmos", inputs=[ io.Vae.Input("vae"), io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 3e97084a4..c9d7e06fc 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -729,7 +729,7 @@ class SamplerCustom(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SamplerCustom", - category="model/sampling/custom_sampling", + category="model/sampling/custom", inputs=[ io.Model.Input("model"), io.Boolean.Input("add_noise", default=True, advanced=True), @@ -1015,7 +1015,7 @@ class SamplerCustomAdvanced(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SamplerCustomAdvanced", - category="model/sampling/custom_sampling", + category="model/sampling/custom", inputs=[ io.Noise.Input("noise"), io.Guider.Input("guider"), @@ -1143,7 +1143,7 @@ class CFGOverride(io.ComfyNode): display_name="CFG Override", description="Override cfg to a fixed value over a [start, end] percent (sigma) range. " "With multiple overrides, the one nearest the sampler wins on overlap.", - category="sampling/custom_sampling", + category="model/sampling/guiders", inputs=[ io.Model.Input("model"), io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01), diff --git a/comfy_extras/nodes_easycache.py b/comfy_extras/nodes_easycache.py index 923c2bb05..9e907d371 100644 --- a/comfy_extras/nodes_easycache.py +++ b/comfy_extras/nodes_easycache.py @@ -363,7 +363,7 @@ class EasyCacheNode(io.ComfyNode): node_id="EasyCache", display_name="EasyCache", description="Native EasyCache implementation.", - category="advanced/debug/model", + category="advanced/debug", is_experimental=True, inputs=[ io.Model.Input("model", tooltip="The model to add EasyCache to."), @@ -496,7 +496,7 @@ class LazyCacheNode(io.ComfyNode): node_id="LazyCache", display_name="LazyCache", description="A homebrew version of EasyCache - even 'easier' version of EasyCache to implement. Overall works worse than EasyCache, but better in some rare cases AND universal compatibility with everything in ComfyUI.", - category="advanced/debug/model", + category="advanced/debug", is_experimental=True, inputs=[ io.Model.Input("model", tooltip="The model to add LazyCache to."), diff --git a/comfy_extras/nodes_edit_model.py b/comfy_extras/nodes_edit_model.py index 36da66f34..d0d20ae7a 100644 --- a/comfy_extras/nodes_edit_model.py +++ b/comfy_extras/nodes_edit_model.py @@ -8,7 +8,8 @@ class ReferenceLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ReferenceLatent", - category="advanced/conditioning/edit_models", + display_name="Set Reference Latent", + category="model/conditioning", description="This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images.", inputs=[ io.Conditioning.Input("conditioning"), diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py index ef1757ae5..e9986c9e7 100644 --- a/comfy_extras/nodes_flux.py +++ b/comfy_extras/nodes_flux.py @@ -13,7 +13,7 @@ class CLIPTextEncodeFlux(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeFlux", - category="advanced/conditioning/flux", + category="model/conditioning/flux", inputs=[ io.Clip.Input("clip"), io.String.Input("clip_l", multiline=True, dynamic_prompts=True), @@ -40,7 +40,7 @@ class EmptyFlux2LatentImage(io.ComfyNode): return io.Schema( node_id="EmptyFlux2LatentImage", display_name="Empty Flux 2 Latent", - category="model/latent", + category="model/latent/flux", inputs=[ io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -61,7 +61,7 @@ class FluxGuidance(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="FluxGuidance", - category="advanced/conditioning/flux", + category="model/conditioning/flux", inputs=[ io.Conditioning.Input("conditioning"), io.Float.Input("guidance", default=3.5, min=0.0, max=100.0, step=0.1), @@ -84,7 +84,7 @@ class FluxDisableGuidance(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="FluxDisableGuidance", - category="advanced/conditioning/flux", + category="model/conditioning/flux", description="This node completely disables the guidance embed on Flux and Flux like models", inputs=[ io.Conditioning.Input("conditioning"), @@ -128,7 +128,7 @@ class FluxKontextImageScale(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="FluxKontextImageScale", - category="advanced/conditioning/flux", + category="model/conditioning/flux", description="This node resizes the image to one that is more optimal for flux kontext.", inputs=[ io.Image.Input("image"), @@ -156,7 +156,7 @@ class FluxKontextMultiReferenceLatentMethod(io.ComfyNode): return io.Schema( node_id="FluxKontextMultiReferenceLatentMethod", display_name="Edit Model Reference Method", - category="advanced/conditioning/flux", + category="model/conditioning/flux", inputs=[ io.Conditioning.Input("conditioning"), io.Combo.Input( diff --git a/comfy_extras/nodes_hidream.py b/comfy_extras/nodes_hidream.py index e345fe51d..65248561b 100644 --- a/comfy_extras/nodes_hidream.py +++ b/comfy_extras/nodes_hidream.py @@ -11,8 +11,9 @@ class QuadrupleCLIPLoader(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="QuadrupleCLIPLoader", - category="advanced/loaders", - description="[Recipes]\n\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct", + display_name="Load CLIP (Quadruple)", + category="model/loaders", + description="Recipes:\nhidream: long clip-l, long clip-g, t5xxl, llama_8b_3.1_instruct", inputs=[ io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")), @@ -38,8 +39,9 @@ class CLIPTextEncodeHiDream(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeHiDream", + display_name="CLIP Text Encode (HiDream)", search_aliases=["hidream prompt"], - category="advanced/conditioning", + category="model/conditioning/hidream", inputs=[ io.Clip.Input("clip"), io.String.Input("clip_l", multiline=True, dynamic_prompts=True), diff --git a/comfy_extras/nodes_hidream_o1.py b/comfy_extras/nodes_hidream_o1.py index 8648d2e26..85693fce6 100644 --- a/comfy_extras/nodes_hidream_o1.py +++ b/comfy_extras/nodes_hidream_o1.py @@ -14,7 +14,7 @@ class EmptyHiDreamO1LatentImage(io.ComfyNode): return io.Schema( node_id="EmptyHiDreamO1LatentImage", display_name="Empty HiDream-O1 Latent Image", - category="model/latent/image", + category="model/latent/hidream", description=( "Empty pixel-space latent for HiDream-O1-Image. The model was " "trained at ~4 megapixels; lower resolutions go off-distribution " @@ -47,7 +47,7 @@ class HiDreamO1ReferenceImages(io.ComfyNode): return io.Schema( node_id="HiDreamO1ReferenceImages", display_name="HiDream-O1 Reference Images", - category="model/conditioning/image", + category="model/conditioning/hidream", description=( "Attach 1-10 reference images to conditioning, one for edit instruction" "or multiple for subject-driven personalization." @@ -117,7 +117,7 @@ class HiDreamO1PatchSeamSmoothing(io.ComfyNode): return io.Schema( node_id="HiDreamO1PatchSeamSmoothing", display_name="HiDream-O1 Patch Seam Smoothing", - category="advanced/model", + category="model/patch/hidream", is_experimental=True, description=( "Average the model output across multiple shifted patch-grid " diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py index 16fff12af..8df2c8908 100644 --- a/comfy_extras/nodes_hunyuan.py +++ b/comfy_extras/nodes_hunyuan.py @@ -14,7 +14,8 @@ class CLIPTextEncodeHunyuanDiT(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeHunyuanDiT", - category="advanced/conditioning", + display_name="CLIP Text Encode (Hunyuan Image)", + category="model/conditioning/hunyuan image", inputs=[ io.Clip.Input("clip"), io.String.Input("bert", multiline=True, dynamic_prompts=True), @@ -41,7 +42,7 @@ class EmptyHunyuanLatentVideo(io.ComfyNode): return io.Schema( node_id="EmptyHunyuanLatentVideo", display_name="Empty HunyuanVideo 1.0 Latent", - category="model/latent/video", + category="model/latent/hunyuan video", inputs=[ io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -67,6 +68,7 @@ class EmptyHunyuanVideo15Latent(EmptyHunyuanLatentVideo): schema = super().define_schema() schema.node_id = "EmptyHunyuanVideo15Latent" schema.display_name = "Empty HunyuanVideo 1.5 Latent" + schema.category = "model/latent/hunyuan video" return schema @classmethod @@ -81,7 +83,7 @@ class HunyuanVideo15ImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="HunyuanVideo15ImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/hunyuan video", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -132,7 +134,7 @@ class HunyuanVideo15SuperResolution(io.ComfyNode): return io.Schema( node_id="HunyuanVideo15SuperResolution", display_name="Hunyuan Video 1.5 Super Resolution", - category="model/conditioning/video_models", + category="model/conditioning/hunyuan video", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -227,7 +229,7 @@ class HunyuanVideo15LatentUpscaleWithModel(io.ComfyNode): return io.Schema( node_id="HunyuanVideo15LatentUpscaleWithModel", display_name="Hunyuan Video 15 Latent Upscale With Model", - category="model/latent", + category="model/latent/hunyhuan video", inputs=[ io.LatentUpscaleModel.Input("model"), io.Latent.Input("samples"), @@ -276,7 +278,7 @@ class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TextEncodeHunyuanVideo_ImageToVideo", - category="advanced/conditioning", + category="model/conditioning/hunyuan video", inputs=[ io.Clip.Input("clip"), io.ClipVisionOutput.Input("clip_vision_output"), @@ -308,7 +310,7 @@ class HunyuanImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="HunyuanImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/hunyuan video", inputs=[ io.Conditioning.Input("positive"), io.Vae.Input("vae"), @@ -359,7 +361,7 @@ class EmptyHunyuanImageLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="EmptyHunyuanImageLatent", - category="model/latent", + category="model/latent/hunyuan image", inputs=[ io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32), @@ -384,7 +386,7 @@ class HunyuanRefinerLatent(io.ComfyNode): return io.Schema( node_id="HunyuanRefinerLatent", display_name="Hunyuan Latent Refiner", - category="model/conditioning/video_models", + category="model/conditioning/hunyuan video", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index 60e530626..c5fa946cc 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -12,7 +12,7 @@ class EmptyLatentHunyuan3Dv2(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="EmptyLatentHunyuan3Dv2", - category="model/latent/3d", + category="model/latent/hunyuan 3d", inputs=[ IO.Int.Input("resolution", default=3072, min=1, max=8192), IO.Int.Input("batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."), @@ -35,7 +35,7 @@ class Hunyuan3Dv2Conditioning(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="Hunyuan3Dv2Conditioning", - category="model/conditioning/3d_models", + category="model/conditioning/hunyuan 3d", inputs=[ IO.ClipVisionOutput.Input("clip_vision_output"), ], @@ -60,7 +60,7 @@ class Hunyuan3Dv2ConditioningMultiView(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="Hunyuan3Dv2ConditioningMultiView", - category="model/conditioning/3d_models", + category="model/conditioning/hunyuan 3d", inputs=[ IO.ClipVisionOutput.Input("front", optional=True), IO.ClipVisionOutput.Input("left", optional=True), @@ -97,7 +97,7 @@ class VAEDecodeHunyuan3D(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="VAEDecodeHunyuan3D", - category="model/latent/3d", + category="model/latent/hunyuan 3d", inputs=[ IO.Latent.Input("samples"), IO.Vae.Input("vae"), diff --git a/comfy_extras/nodes_ideogram4.py b/comfy_extras/nodes_ideogram4.py index d5827db4f..4070db17c 100644 --- a/comfy_extras/nodes_ideogram4.py +++ b/comfy_extras/nodes_ideogram4.py @@ -38,7 +38,7 @@ class Ideogram4Scheduler(io.ComfyNode): return io.Schema( node_id="Ideogram4Scheduler", display_name="Ideogram 4 Scheduler", - category="sampling/custom_sampling/schedulers", + category="model/sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=200), io.Int.Input("width", default=1024, min=256, max=8192, step=16), diff --git a/comfy_extras/nodes_kandinsky5.py b/comfy_extras/nodes_kandinsky5.py index 015965498..96cca0386 100644 --- a/comfy_extras/nodes_kandinsky5.py +++ b/comfy_extras/nodes_kandinsky5.py @@ -13,7 +13,7 @@ class Kandinsky5ImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="Kandinsky5ImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/kandinsky", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -71,7 +71,7 @@ class NormalizeVideoLatentStart(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="NormalizeVideoLatentStart", - category="model/conditioning/video_models", + category="model/conditioning", description="Normalizes the initial frames of a video latent to match the mean and standard deviation of subsequent reference frames. Helps reduce differences between the starting frames and the rest of the video.", inputs=[ io.Latent.Input("latent"), @@ -104,8 +104,9 @@ class CLIPTextEncodeKandinsky5(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodeKandinsky5", + display_name="CLIP Text Encode (Kandinsky 5)", search_aliases=["kandinsky prompt"], - category="advanced/conditioning/kandinsky5", + category="model/conditioning/kandinsky", inputs=[ io.Clip.Input("clip"), io.String.Input("clip_l", multiline=True, dynamic_prompts=True), diff --git a/comfy_extras/nodes_latent.py b/comfy_extras/nodes_latent.py index 32da9e8ac..1f93e34d6 100644 --- a/comfy_extras/nodes_latent.py +++ b/comfy_extras/nodes_latent.py @@ -262,6 +262,7 @@ class LatentBatch(io.ComfyNode): return io.Schema( node_id="LatentBatch", search_aliases=["combine latents", "merge latents", "join latents"], + display_name="Batch Latents (DEPRECATED)", category="model/latent/batch", is_deprecated=True, inputs=[ @@ -447,6 +448,7 @@ class ReplaceVideoLatentFrames(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ReplaceVideoLatentFrames", + display_name="Replace Video Latent Frames", category="model/latent/batch", inputs=[ io.Latent.Input("destination", tooltip="The destination latent where frames will be replaced."), diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index 6d6078abe..85d76ecef 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -25,7 +25,7 @@ class GetICLoRAParameters(io.ComfyNode): display_name="Get IC-LoRA Parameters", description="Extracts IC-LoRA parameters from the safetensors metadata of a LoRA-loaded " "model and outputs them for LTXVAddGuide (eg. reference_downscale_factor).", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", search_aliases=["ic-lora", "ic lora", "iclora", "downscale factor", "reference downscale"], inputs=[ io.Model.Input( @@ -62,7 +62,7 @@ class EmptyLTXVLatentVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="EmptyLTXVLatentVideo", - category="model/latent/video/ltxv", + category="model/latent/ltxv", inputs=[ io.Int.Input("width", default=768, min=64, max=nodes.MAX_RESOLUTION, step=32), io.Int.Input("height", default=512, min=64, max=nodes.MAX_RESOLUTION, step=32), @@ -86,7 +86,7 @@ class LTXVImgToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVImgToVideo", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -131,7 +131,7 @@ class LTXVImgToVideoInplace(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVImgToVideoInplace", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", inputs=[ io.Vae.Input("vae"), io.Image.Input("image"), @@ -251,7 +251,7 @@ class LTXVAddGuide(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVAddGuide", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -498,7 +498,7 @@ class LTXVCropGuides(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVCropGuides", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -542,7 +542,7 @@ class LTXVConditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVConditioning", - category="model/conditioning/video_models", + category="model/conditioning/ltxv", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -566,7 +566,7 @@ class ModelSamplingLTXV(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ModelSamplingLTXV", - category="advanced/model", + category="model/patch/ltxv", inputs=[ io.Model.Input("model"), io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01), @@ -746,7 +746,7 @@ class LTXVConcatAVLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVConcatAVLatent", - category="model/latent/video/ltxv", + category="model/latent/ltxv", inputs=[ io.Latent.Input("video_latent"), io.Latent.Input("audio_latent"), @@ -781,7 +781,7 @@ class LTXVSeparateAVLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="LTXVSeparateAVLatent", - category="model/latent/video/ltxv", + category="model/latent/ltxv", description="LTXV Separate AV Latent", inputs=[ io.Latent.Input("av_latent"), @@ -814,7 +814,7 @@ class LTXVReferenceAudio(io.ComfyNode): return io.Schema( node_id="LTXVReferenceAudio", display_name="LTXV Reference Audio (ID-LoRA)", - category="model/conditioning/audio", + category="model/conditioning/ltxv", description="Set reference audio for ID-LoRA speaker identity transfer. Encodes a reference audio clip into the conditioning and optionally patches the model with identity guidance (extra forward pass without reference, amplifying the speaker identity effect).", inputs=[ io.Model.Input("model"), diff --git a/comfy_extras/nodes_lt_audio.py b/comfy_extras/nodes_lt_audio.py index 052186083..2d774a0a3 100644 --- a/comfy_extras/nodes_lt_audio.py +++ b/comfy_extras/nodes_lt_audio.py @@ -40,7 +40,7 @@ class LTXVAudioVAEEncode(VAEEncodeAudio): return io.Schema( node_id="LTXVAudioVAEEncode", display_name="LTXV Audio VAE Encode", - category="model/latent/audio", + category="model/latent/ltxv", inputs=[ io.Audio.Input("audio", tooltip="The audio to be encoded."), io.Vae.Input( @@ -63,7 +63,7 @@ class LTXVAudioVAEDecode(io.ComfyNode): return io.Schema( node_id="LTXVAudioVAEDecode", display_name="LTXV Audio VAE Decode", - category="model/latent/audio", + category="model/latent/ltxv", inputs=[ io.Latent.Input("samples", tooltip="The latent to be decoded."), io.Vae.Input( @@ -96,7 +96,7 @@ class LTXVEmptyLatentAudio(io.ComfyNode): return io.Schema( node_id="LTXVEmptyLatentAudio", display_name="LTXV Empty Latent Audio", - category="model/latent/audio", + category="model/latent/ltxv", inputs=[ io.Int.Input( "frames_number", @@ -168,9 +168,9 @@ class LTXAVTextEncoderLoader(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="LTXAVTextEncoderLoader", - display_name="LTXV Audio Text Encoder Loader", - category="advanced/loaders", - description="[Recipes]\n\nltxav: gemma 3 12B", + display_name="Load LTXV Audio Text Encoder", + category="model/loaders", + description="Recipes:\nltxav: gemma 3 12B", inputs=[ io.Combo.Input( "text_encoder", diff --git a/comfy_extras/nodes_lt_upsampler.py b/comfy_extras/nodes_lt_upsampler.py index be9a36e69..ef36109d1 100644 --- a/comfy_extras/nodes_lt_upsampler.py +++ b/comfy_extras/nodes_lt_upsampler.py @@ -13,7 +13,7 @@ class LTXVLatentUpsampler(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="LTXVLatentUpsampler", - category="model/latent/video", + category="model/latent/ltxv", is_experimental=True, inputs=[ IO.Latent.Input("samples"), diff --git a/comfy_extras/nodes_lumina2.py b/comfy_extras/nodes_lumina2.py index c060a86a0..bc543c242 100644 --- a/comfy_extras/nodes_lumina2.py +++ b/comfy_extras/nodes_lumina2.py @@ -9,7 +9,7 @@ class RenormCFG(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="RenormCFG", - category="advanced/model", + category="model/patch", inputs=[ io.Model.Input("model"), io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01, advanced=True), @@ -80,8 +80,8 @@ class CLIPTextEncodeLumina2(io.ComfyNode): return io.Schema( node_id="CLIPTextEncodeLumina2", search_aliases=["lumina prompt"], - display_name="CLIP Text Encode for Lumina2", - category="model/conditioning", + display_name="CLIP Text Encode (Lumina 2)", + category="model/conditioning/lumina", description="Encodes a system prompt and a user prompt using a CLIP model into an embedding " "that can be used to guide the diffusion model towards generating specific images.", inputs=[ diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index 52484697a..76af338de 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -53,6 +53,7 @@ class LatentCompositeMasked(IO.ComfyNode): return IO.Schema( node_id="LatentCompositeMasked", search_aliases=["overlay latent", "layer latent", "paste latent", "inpaint latent"], + display_name="Latent Composite Masked", category="model/latent", inputs=[ IO.Latent.Input("destination"), diff --git a/comfy_extras/nodes_mochi.py b/comfy_extras/nodes_mochi.py index 3dcea6ab3..3aaf23e69 100644 --- a/comfy_extras/nodes_mochi.py +++ b/comfy_extras/nodes_mochi.py @@ -10,7 +10,7 @@ class EmptyMochiLatentVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="EmptyMochiLatentVideo", - category="model/latent/video", + category="model/latent/mochi", inputs=[ io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), diff --git a/comfy_extras/nodes_model_advanced.py b/comfy_extras/nodes_model_advanced.py index b27ac1296..a336ba079 100644 --- a/comfy_extras/nodes_model_advanced.py +++ b/comfy_extras/nodes_model_advanced.py @@ -59,7 +59,7 @@ class ModelSamplingDiscrete: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch" def patch(self, model, sampling, zsnr): m = model.clone() @@ -97,7 +97,7 @@ class ModelSamplingStableCascade: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch/stable cascade" def patch(self, model, shift): m = model.clone() @@ -123,7 +123,7 @@ class ModelSamplingSD3: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch/stable diffusion" def patch(self, model, shift, multiplier=1000): m = model.clone() @@ -150,6 +150,7 @@ class ModelSamplingAuraFlow(ModelSamplingSD3): }} FUNCTION = "patch_aura" + CATEGORY = "model/patch" def patch_aura(self, model, shift): return self.patch(model, shift, multiplier=1.0) @@ -167,7 +168,7 @@ class ModelSamplingFlux: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch/flux" def patch(self, model, max_shift, base_shift, width, height): m = model.clone() @@ -202,7 +203,7 @@ class ModelSamplingContinuousEDM: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch" def patch(self, model, sampling, sigma_max, sigma_min): m = model.clone() @@ -247,7 +248,7 @@ class ModelSamplingContinuousV: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch" def patch(self, model, sampling, sigma_max, sigma_min): m = model.clone() @@ -273,7 +274,7 @@ class RescaleCFG: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch" def patch(self, model, multiplier): def rescale_cfg(args): @@ -314,7 +315,7 @@ class ModelNoiseScale: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/model" + CATEGORY = "model/patch" def patch(self, model, noise_scale): m = model.clone() @@ -337,7 +338,7 @@ class ModelComputeDtype: RETURN_TYPES = ("MODEL",) FUNCTION = "patch" - CATEGORY = "advanced/debug/model" + CATEGORY = "advanced/debug" def patch(self, model, dtype): m = model.clone() diff --git a/comfy_extras/nodes_model_merging.py b/comfy_extras/nodes_model_merging.py index b6b29e34a..962d2a0bb 100644 --- a/comfy_extras/nodes_model_merging.py +++ b/comfy_extras/nodes_model_merging.py @@ -21,7 +21,7 @@ class ModelMergeSimple: RETURN_TYPES = ("MODEL",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, model1, model2, ratio): m = model1.clone() @@ -40,7 +40,7 @@ class ModelSubtract: RETURN_TYPES = ("MODEL",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, model1, model2, multiplier): m = model1.clone() @@ -58,7 +58,7 @@ class ModelAdd: RETURN_TYPES = ("MODEL",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, model1, model2): m = model1.clone() @@ -78,7 +78,7 @@ class CLIPMergeSimple: RETURN_TYPES = ("CLIP",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, clip1, clip2, ratio): m = clip1.clone() @@ -101,7 +101,7 @@ class CLIPSubtract: RETURN_TYPES = ("CLIP",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, clip1, clip2, multiplier): m = clip1.clone() @@ -123,7 +123,7 @@ class CLIPAdd: RETURN_TYPES = ("CLIP",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, clip1, clip2): m = clip1.clone() @@ -147,7 +147,7 @@ class ModelMergeBlocks: RETURN_TYPES = ("MODEL",) FUNCTION = "merge" - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def merge(self, model1, model2, **kwargs): m = model1.clone() @@ -242,7 +242,7 @@ class CheckpointSave: FUNCTION = "save" OUTPUT_NODE = True - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def save(self, model, clip, vae, filename_prefix, prompt=None, extra_pnginfo=None): save_checkpoint(model, clip=clip, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) @@ -261,7 +261,7 @@ class CLIPSave: FUNCTION = "save" OUTPUT_NODE = True - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def save(self, clip, filename_prefix, prompt=None, extra_pnginfo=None): prompt_info = "" @@ -318,7 +318,7 @@ class VAESave: FUNCTION = "save" OUTPUT_NODE = True - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def save(self, vae, filename_prefix, prompt=None, extra_pnginfo=None): full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) @@ -353,7 +353,7 @@ class ModelSave: FUNCTION = "save" OUTPUT_NODE = True - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None): save_checkpoint(model, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) diff --git a/comfy_extras/nodes_model_merging_model_specific.py b/comfy_extras/nodes_model_merging_model_specific.py index 55eb3ccfe..2fa684b3a 100644 --- a/comfy_extras/nodes_model_merging_model_specific.py +++ b/comfy_extras/nodes_model_merging_model_specific.py @@ -1,7 +1,7 @@ import comfy_extras.nodes_model_merging class ModelMergeSD1(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): arg_dict = { "model1": ("MODEL",), @@ -27,7 +27,7 @@ class ModelMergeSD1(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeSDXL(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -53,7 +53,7 @@ class ModelMergeSDXL(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -77,7 +77,7 @@ class ModelMergeSD3_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks): class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -104,7 +104,7 @@ class ModelMergeAuraflow(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -130,7 +130,7 @@ class ModelMergeFlux1(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -153,7 +153,7 @@ class ModelMergeSD35_Large(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -175,7 +175,7 @@ class ModelMergeMochiPreview(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeLTXV(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -197,7 +197,7 @@ class ModelMergeLTXV(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeCosmos7B(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -221,7 +221,7 @@ class ModelMergeCosmos7B(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -245,7 +245,7 @@ class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" DESCRIPTION = "1.3B model has 30 blocks, 14B model has 40 blocks. Image to video model has the extra img_emb." @classmethod @@ -269,7 +269,7 @@ class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -292,7 +292,7 @@ class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlo return {"required": arg_dict} class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): @@ -315,7 +315,7 @@ class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBl return {"required": arg_dict} class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks): - CATEGORY = "advanced/model_merging/model_specific" + CATEGORY = "model/merging/model specific" @classmethod def INPUT_TYPES(s): diff --git a/comfy_extras/nodes_model_patch.py b/comfy_extras/nodes_model_patch.py index bdccbf8c4..3f785c8b5 100644 --- a/comfy_extras/nodes_model_patch.py +++ b/comfy_extras/nodes_model_patch.py @@ -232,7 +232,7 @@ class ModelPatchLoader: FUNCTION = "load_model_patch" EXPERIMENTAL = True - CATEGORY = "advanced/loaders" + CATEGORY = "model/loaders" def load_model_patch(self, name): model_patch_path = folder_paths.get_full_path_or_raise("model_patches", name) @@ -479,7 +479,7 @@ class QwenImageDiffsynthControlnet: FUNCTION = "diffsynth_controlnet" EXPERIMENTAL = True - CATEGORY = "advanced/loaders/qwen" + CATEGORY = "model/patch/qwen" def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None): model_patched = model.clone() @@ -512,7 +512,7 @@ class ZImageFunControlnet(QwenImageDiffsynthControlnet): }, "optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}} - CATEGORY = "advanced/loaders/zimage" + CATEGORY = "model/patch/z-image" class UsoStyleProjectorPatch: def __init__(self, model_patch, encoded_image): @@ -675,3 +675,11 @@ NODE_CLASS_MAPPINGS = { "USOStyleReference": USOStyleReference, "SUPIRApply": SUPIRApply, } + +NODE_DISPLAY_NAME_MAPPINGS = { + "ModelPatchLoader": "Load Model Patch", + "QwenImageDiffsynthControlnet": "Apply Qwen Image DiffSynth ControlNet", + "ZImageFunControlnet": "Apply Z-Image Fun ControlNet", + "USOStyleReference": "Apply USO Style Reference", + "SUPIRApply": "Apply SUPIR Patch", +} diff --git a/comfy_extras/nodes_pid.py b/comfy_extras/nodes_pid.py index 71855254e..a3ffd9671 100644 --- a/comfy_extras/nodes_pid.py +++ b/comfy_extras/nodes_pid.py @@ -14,10 +14,8 @@ class PiDConditioning(io.ComfyNode): return io.Schema( node_id="PiDConditioning", display_name="PiD Conditioning", - category="advanced/conditioning", - description=( - "Attaches a latent and a degrade_sigma scalar to a CONDITIONING for PiD decoding/upscaling" - ), + category="model/conditioning", + description=("Attaches a latent and a degrade_sigma scalar to a CONDITIONING for PiD decoding/upscaling"), inputs=[ io.Conditioning.Input("positive"), io.Latent.Input("latent", tooltip="latent (from VAEEncode or a KSampler)."), diff --git a/comfy_extras/nodes_pixart.py b/comfy_extras/nodes_pixart.py index 2f1b73e60..f878a33b5 100644 --- a/comfy_extras/nodes_pixart.py +++ b/comfy_extras/nodes_pixart.py @@ -7,8 +7,9 @@ class CLIPTextEncodePixArtAlpha(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="CLIPTextEncodePixArtAlpha", + display_name="CLIP Text Encode (PixArt Alpha)", search_aliases=["pixart prompt"], - category="advanced/conditioning", + category="model/conditioning/pixart", description="Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma.", inputs=[ io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index 3e440433e..763b8a52f 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -616,7 +616,7 @@ class BatchLatentsNode(io.ComfyNode): node_id="BatchLatentsNode", search_aliases=["combine latents", "stack latents", "merge latents"], display_name="Batch Latents", - category="model/latent", + category="model/latent/batch", inputs=[ io.Autogrow.Input("latents", template=autogrow_template) ], diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py index 5b92814a4..4960774db 100644 --- a/comfy_extras/nodes_qwen.py +++ b/comfy_extras/nodes_qwen.py @@ -12,7 +12,7 @@ class TextEncodeQwenImageEdit(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TextEncodeQwenImageEdit", - category="advanced/conditioning", + category="model/conditioning/qwen image", inputs=[ io.Clip.Input("clip"), io.String.Input("prompt", multiline=True, dynamic_prompts=True), @@ -55,7 +55,7 @@ class TextEncodeQwenImageEditPlus(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TextEncodeQwenImageEditPlus", - category="advanced/conditioning", + category="model/conditioning/qwen image", inputs=[ io.Clip.Input("clip"), io.String.Input("prompt", multiline=True, dynamic_prompts=True), diff --git a/comfy_extras/nodes_scail.py b/comfy_extras/nodes_scail.py index bba0942d7..007733efc 100644 --- a/comfy_extras/nodes_scail.py +++ b/comfy_extras/nodes_scail.py @@ -123,7 +123,7 @@ class WanSCAILToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanSCAILToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/scail", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -257,18 +257,16 @@ class SCAIL2ColoredMask(io.ComfyNode): return io.Schema( node_id="SCAIL2ColoredMask", display_name="Create SCAIL-2 Colored Mask", - category="conditioning/video_models/scail", + category="model/conditioning/wan/scail", inputs=[ SAM3TrackData.Input("driving_track_data", tooltip="SAM3 track of the driving pose video. Will be rendered into the pose_video_mask output."), - SAM3TrackData.Input("ref_track_data", optional=True, - tooltip="SAM3 track of the reference image."), - io.String.Input("object_indices", default="", - tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."), + SAM3TrackData.Input("ref_track_data", optional=True, tooltip="SAM3 track of the reference image."), + io.String.Input("object_indices", default="", tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."), io.Combo.Input("sort_by", options=["none", "left_to_right", "area"], default="left_to_right", - tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). left_to_right = leftmost object (by first-frame centroid) gets the first color; area = biggest object (by first-frame mask area) gets the first color; none = keep SAM3's order."), + tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). left_to_right = leftmost object (by first-frame centroid) gets the first color; area = biggest object (by first-frame mask area) gets the first color; none = keep SAM3's order."), io.Boolean.Input("replacement_mode", default=False, - tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). " - "True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."), + tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). " + "True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."), ], outputs=[ io.Image.Output("pose_video_mask"), diff --git a/comfy_extras/nodes_sd3.py b/comfy_extras/nodes_sd3.py index 38cbf117b..40e84656b 100644 --- a/comfy_extras/nodes_sd3.py +++ b/comfy_extras/nodes_sd3.py @@ -13,8 +13,9 @@ class TripleCLIPLoader(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TripleCLIPLoader", - category="advanced/loaders", - description="[Recipes]\n\nsd3: clip-l, clip-g, t5", + display_name="Load CLIP (Triple)", + category="model/loaders", + description="Recipes:\nsd3: clip-l, clip-g, t5", inputs=[ io.Combo.Input("clip_name1", options=folder_paths.get_filename_list("text_encoders")), io.Combo.Input("clip_name2", options=folder_paths.get_filename_list("text_encoders")), @@ -41,7 +42,7 @@ class EmptySD3LatentImage(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="EmptySD3LatentImage", - category="model/latent/sd3", + category="model/latent/stable diffusion", inputs=[ io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=16), @@ -66,7 +67,8 @@ class CLIPTextEncodeSD3(io.ComfyNode): return io.Schema( node_id="CLIPTextEncodeSD3", search_aliases=["sd3 prompt"], - category="advanced/conditioning", + display_name="CLIP Text Encode (SD3)", + category="model/conditioning/stable diffusion", inputs=[ io.Clip.Input("clip"), io.String.Input("clip_l", multiline=True, dynamic_prompts=True), diff --git a/comfy_extras/nodes_sdupscale.py b/comfy_extras/nodes_sdupscale.py index ea283e971..5c247fb49 100644 --- a/comfy_extras/nodes_sdupscale.py +++ b/comfy_extras/nodes_sdupscale.py @@ -9,7 +9,7 @@ class SD_4XUpscale_Conditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SD_4XUpscale_Conditioning", - category="model/conditioning/upscale_diffusion", + category="model/conditioning/stable diffusion upscaler", inputs=[ io.Image.Input("images"), io.Conditioning.Input("positive"), diff --git a/comfy_extras/nodes_stable3d.py b/comfy_extras/nodes_stable3d.py index 8a6e5b726..b0eba819b 100644 --- a/comfy_extras/nodes_stable3d.py +++ b/comfy_extras/nodes_stable3d.py @@ -27,7 +27,7 @@ class StableZero123_Conditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="StableZero123_Conditioning", - category="model/conditioning/3d_models", + category="model/conditioning/stable zero123", inputs=[ io.ClipVision.Input("clip_vision"), io.Image.Input("init_image"), @@ -65,7 +65,7 @@ class StableZero123_Conditioning_Batched(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="StableZero123_Conditioning_Batched", - category="model/conditioning/3d_models", + category="model/conditioning/stable zero123", inputs=[ io.ClipVision.Input("clip_vision"), io.Image.Input("init_image"), @@ -112,7 +112,7 @@ class SV3D_Conditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SV3D_Conditioning", - category="model/conditioning/3d_models", + category="model/conditioning/stable video 3d", inputs=[ io.ClipVision.Input("clip_vision"), io.Image.Input("init_image"), diff --git a/comfy_extras/nodes_stable_cascade.py b/comfy_extras/nodes_stable_cascade.py index e55f248ae..6a78ffb47 100644 --- a/comfy_extras/nodes_stable_cascade.py +++ b/comfy_extras/nodes_stable_cascade.py @@ -29,7 +29,7 @@ class StableCascade_EmptyLatentImage(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="StableCascade_EmptyLatentImage", - category="model/latent/stable_cascade", + category="model/latent/stable cascade", inputs=[ io.Int.Input("width", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), io.Int.Input("height", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), @@ -58,7 +58,7 @@ class StableCascade_StageC_VAEEncode(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="StableCascade_StageC_VAEEncode", - category="model/latent/stable_cascade", + category="model/latent/stable cascade", inputs=[ io.Image.Input("image"), io.Vae.Input("vae"), @@ -93,7 +93,7 @@ class StableCascade_StageB_Conditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="StableCascade_StageB_Conditioning", - category="model/conditioning/stable_cascade", + category="model/conditioning/stable cascade", inputs=[ io.Conditioning.Input("conditioning"), io.Latent.Input("stage_c"), diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py index bb68da6fa..a27217b80 100644 --- a/comfy_extras/nodes_train.py +++ b/comfy_extras/nodes_train.py @@ -1367,7 +1367,7 @@ class SaveLoRA(io.ComfyNode): node_id="SaveLoRA", search_aliases=["export lora"], display_name="Save LoRA Weights", - category="advanced/model_merging", + category="model/merging", is_experimental=True, is_output_node=True, inputs=[ diff --git a/comfy_extras/nodes_video_model.py b/comfy_extras/nodes_video_model.py index 0d6cae6a8..01d48d4d4 100644 --- a/comfy_extras/nodes_video_model.py +++ b/comfy_extras/nodes_video_model.py @@ -41,7 +41,7 @@ class SVD_img2vid_Conditioning: FUNCTION = "encode" - CATEGORY = "model/conditioning/video_models" + CATEGORY = "model/conditioning/stable video" def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level): output = clip_vision.encode_image(init_image) @@ -108,7 +108,7 @@ class VideoTriangleCFGGuidance: return (m, ) class ImageOnlyCheckpointSave(comfy_extras.nodes_model_merging.CheckpointSave): - CATEGORY = "advanced/model_merging" + CATEGORY = "model/merging" @classmethod def INPUT_TYPES(s): @@ -138,7 +138,7 @@ class ConditioningSetAreaPercentageVideo: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def append(self, conditioning, width, height, temporal, x, y, z, strength): c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", temporal, height, width, z, y, x), @@ -160,4 +160,5 @@ NODE_DISPLAY_NAME_MAPPINGS = { "ImageOnlyCheckpointLoader": "Load Checkpoint Image Only (img2vid model)", "VideoLinearCFGGuidance": "Video Linear CFG Guidance", "VideoTriangleCFGGuidance": "Video Triangle CFG Guidance", + "ConditioningSetAreaPercentageVideo": "Conditioning (Set Area with Percentage for Video)", } diff --git a/comfy_extras/nodes_void.py b/comfy_extras/nodes_void.py index b43154b8d..7527baf43 100644 --- a/comfy_extras/nodes_void.py +++ b/comfy_extras/nodes_void.py @@ -175,7 +175,7 @@ class VOIDInpaintConditioning(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="VOIDInpaintConditioning", - category="model/conditioning/video_models", + category="model/conditioning/void", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -288,7 +288,7 @@ class VOIDWarpedNoise(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="VOIDWarpedNoise", - category="model/latent/video", + category="model/latent/void", inputs=[ OpticalFlow.Input( "optical_flow", @@ -393,7 +393,7 @@ class VOIDWarpedNoiseSource(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="VOIDWarpedNoiseSource", - category="model/sampling/noise", + category="model/latent/void", inputs=[ io.Latent.Input("warped_noise", tooltip="Warped noise latent from VOIDWarpedNoise"), diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index d73be8e00..0e47a58df 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -18,7 +18,7 @@ class WanImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -66,7 +66,7 @@ class WanFunControlToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanFunControlToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/fun control", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -119,7 +119,7 @@ class Wan22FunControlToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="Wan22FunControlToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/fun control", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -184,7 +184,7 @@ class WanFirstLastFrameToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanFirstLastFrameToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -256,7 +256,7 @@ class WanFunInpaintToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanFunInpaintToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/fun inpaint", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -288,7 +288,7 @@ class WanVaceToVideo(io.ComfyNode): return io.Schema( node_id="WanVaceToVideo", search_aliases=["video conditioning", "video control"], - category="model/conditioning/video_models", + category="model/conditioning/wan/vace", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -375,7 +375,8 @@ class TrimVideoLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TrimVideoLatent", - category="model/latent/video", + display_name="Trim Video Latent", + category="model/latent", inputs=[ io.Latent.Input("samples"), io.Int.Input("trim_amount", default=0, min=0, max=99999), @@ -398,7 +399,7 @@ class WanCameraImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanCameraImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/camera", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -452,7 +453,7 @@ class WanPhantomSubjectToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanPhantomSubjectToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/phantom subject", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -707,7 +708,7 @@ class WanTrackToVideo(io.ComfyNode): return io.Schema( node_id="WanTrackToVideo", search_aliases=["motion tracking", "trajectory video", "point tracking", "keypoint animation"], - category="model/conditioning/video_models", + category="model/conditioning/wan/move", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -951,7 +952,7 @@ class WanSoundImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanSoundImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/sound", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -984,7 +985,7 @@ class WanSoundImageToVideoExtend(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanSoundImageToVideoExtend", - category="model/conditioning/video_models", + category="model/conditioning/wan/sound", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -1046,7 +1047,7 @@ class WanHuMoImageToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanHuMoImageToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/humo", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -1112,7 +1113,7 @@ class WanAnimateToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanAnimateToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/animate", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -1252,7 +1253,7 @@ class Wan22ImageToVideoLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="Wan22ImageToVideoLatent", - category="model/conditioning/inpaint", + category="model/conditioning/wan", inputs=[ io.Vae.Input("vae"), io.Int.Input("width", default=1280, min=32, max=nodes.MAX_RESOLUTION, step=32), @@ -1302,7 +1303,7 @@ class WanInfiniteTalkToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanInfiniteTalkToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/infinite talk", inputs=[ io.DynamicCombo.Input("mode", options=[ io.DynamicCombo.Option("single_speaker", []), diff --git a/comfy_extras/nodes_wandancer.py b/comfy_extras/nodes_wandancer.py index a96885745..fdb2b5e57 100644 --- a/comfy_extras/nodes_wandancer.py +++ b/comfy_extras/nodes_wandancer.py @@ -713,7 +713,7 @@ class WanDancerEncodeAudio(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanDancerEncodeAudio", - category="model/conditioning/video_models", + category="model/conditioning/wan/dancer", inputs=[ io.Audio.Input("audio"), io.Int.Input("video_frames", default=149, min=1, max=nodes.MAX_RESOLUTION, step=4), @@ -787,7 +787,7 @@ class WanDancerVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanDancerVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/dancer", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), diff --git a/comfy_extras/nodes_wanmove.py b/comfy_extras/nodes_wanmove.py index 2db064922..d1f924a40 100644 --- a/comfy_extras/nodes_wanmove.py +++ b/comfy_extras/nodes_wanmove.py @@ -247,7 +247,7 @@ class WanMoveVisualizeTracks(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanMoveVisualizeTracks", - category="model/conditioning/video_models", + category="model/conditioning/wan/move", inputs=[ io.Image.Input("images"), io.Tracks.Input("tracks", optional=True), @@ -283,7 +283,7 @@ class WanMoveTracksFromCoords(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanMoveTracksFromCoords", - category="model/conditioning/video_models", + category="model/conditioning/wan/move", inputs=[ io.String.Input("track_coords", force_input=True, default="[]", optional=True), io.Mask.Input("track_mask", optional=True), @@ -325,7 +325,8 @@ class GenerateTracks(io.ComfyNode): return io.Schema( node_id="GenerateTracks", search_aliases=["motion paths", "camera movement", "trajectory"], - category="model/conditioning/video_models", + display_name="Generate Video Tracks", + category="model/conditioning/wan/move", inputs=[ io.Int.Input("width", default=832, min=16, max=4096, step=16), io.Int.Input("height", default=480, min=16, max=4096, step=16), @@ -434,7 +435,7 @@ class WanMoveConcatTrack(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanMoveConcatTrack", - category="model/conditioning/video_models", + category="model/conditioning/wan/move", inputs=[ io.Tracks.Input("tracks_1"), io.Tracks.Input("tracks_2", optional=True), @@ -463,7 +464,7 @@ class WanMoveTrackToVideo(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanMoveTrackToVideo", - category="model/conditioning/video_models", + category="model/conditioning/wan/move", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), diff --git a/comfy_extras/nodes_zimage.py b/comfy_extras/nodes_zimage.py index 70ddc4afa..ce946b377 100644 --- a/comfy_extras/nodes_zimage.py +++ b/comfy_extras/nodes_zimage.py @@ -10,7 +10,7 @@ class TextEncodeZImageOmni(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="TextEncodeZImageOmni", - category="advanced/conditioning", + category="model/conditioning/z-image", is_experimental=True, inputs=[ io.Clip.Input("clip"), diff --git a/nodes.py b/nodes.py index 0d422d418..bb4649478 100644 --- a/nodes.py +++ b/nodes.py @@ -87,7 +87,7 @@ class ConditioningCombine: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "combine" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" SEARCH_ALIASES = ["combine", "merge conditioning", "combine prompts", "merge prompts", "mix prompts", "add prompt"] def combine(self, conditioning_1, conditioning_2): @@ -104,7 +104,7 @@ class ConditioningAverage : RETURN_TYPES = ("CONDITIONING",) FUNCTION = "addWeighted" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): out = [] @@ -143,7 +143,7 @@ class ConditioningConcat: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "concat" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def concat(self, conditioning_to, conditioning_from): out = [] @@ -176,7 +176,7 @@ class ConditioningSetArea: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def append(self, conditioning, width, height, x, y, strength): c = node_helpers.conditioning_set_values(conditioning, {"area": (height // 8, width // 8, y // 8, x // 8), @@ -197,7 +197,7 @@ class ConditioningSetAreaPercentage: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def append(self, conditioning, width, height, x, y, strength): c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", height, width, y, x), @@ -214,7 +214,7 @@ class ConditioningSetAreaStrength: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def append(self, conditioning, strength): c = node_helpers.conditioning_set_values(conditioning, {"strength": strength}) @@ -234,7 +234,7 @@ class ConditioningSetMask: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "append" - CATEGORY = "model/conditioning" + CATEGORY = "model/conditioning/transform" def append(self, conditioning, mask, set_cond_area, strength): set_area_to_bounds = False @@ -257,7 +257,7 @@ class ConditioningZeroOut: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "zero_out" - CATEGORY = "advanced/conditioning" + CATEGORY = "model/conditioning/transform" def zero_out(self, conditioning): c = [] @@ -283,11 +283,10 @@ class ConditioningSetTimestepRange: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "set_range" - CATEGORY = "advanced/conditioning" + CATEGORY = "model/conditioning/transform" def set_range(self, conditioning, start, end): - c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, - "end_percent": end}) + c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, "end_percent": end}) return (c, ) class VAEDecode: @@ -389,7 +388,7 @@ class VAEEncodeForInpaint: RETURN_TYPES = ("LATENT",) FUNCTION = "encode" - CATEGORY = "model/latent/inpaint" + CATEGORY = "model/latent" def encode(self, vae, pixels, mask, grow_mask_by=6): downscale_ratio = vae.spacial_compression_encode() @@ -438,7 +437,7 @@ class InpaintModelConditioning: RETURN_NAMES = ("positive", "negative", "latent") FUNCTION = "encode" - CATEGORY = "model/conditioning/inpaint" + CATEGORY = "model/conditioning" def encode(self, positive, negative, pixels, vae, mask, noise_mask=True): x = (pixels.shape[1] // 8) * 8 @@ -576,7 +575,7 @@ class CheckpointLoader: RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" - CATEGORY = "advanced/loaders" + CATEGORY = "model/loaders" DEPRECATED = True def load_checkpoint(self, config_name, ckpt_name): @@ -622,8 +621,9 @@ class DiffusersLoader: return {"required": {"model_path": (paths,), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" + DEPRECATED = True - CATEGORY = "advanced/loaders/deprecated" + CATEGORY = "model/loaders" def load_checkpoint(self, model_path, output_vae=True, output_clip=True): for search_path in folder_paths.get_folder_paths("diffusers"): @@ -949,7 +949,7 @@ class UNETLoader: RETURN_TYPES = ("MODEL",) FUNCTION = "load_unet" - CATEGORY = "advanced/loaders" + CATEGORY = "model/loaders" def load_unet(self, unet_name, weight_dtype): model_options = {} @@ -977,9 +977,9 @@ class CLIPLoader: RETURN_TYPES = ("CLIP",) FUNCTION = "load_clip" - CATEGORY = "advanced/loaders" + CATEGORY = "model/loaders" - DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncogvideox: t5 xxl (226-token padding)\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nlens: gpt-oss-20b\n pixeldit: gemma 2 2B elm" + DESCRIPTION = "Recipes:\nsd: clip-l\nstable cascade: clip-g\nsd3: t5 xxl / clip-g / clip-l\nstable audio: t5 base\nmochi: t5 xxl\ncogvideox: t5 xxl (226-token padding)\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\nhidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nlens: gpt-oss-20b\npixeldit: gemma 2 2B elm" def load_clip(self, clip_name, type="stable_diffusion", device="default"): clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION) @@ -1005,9 +1005,9 @@ class DualCLIPLoader: RETURN_TYPES = ("CLIP",) FUNCTION = "load_clip" - CATEGORY = "advanced/loaders" + CATEGORY = "model/loaders" - DESCRIPTION = "[Recipes]\n\nsdxl: clip-l, clip-g\nsd3: clip-l, clip-g / clip-l, t5 / clip-g, t5\nflux: clip-l, t5\nhidream: at least one of t5 or llama, recommended t5 and llama\nhunyuan_image: qwen2.5vl 7b and byt5 small\nnewbie: gemma-3-4b-it, jina clip v2" + DESCRIPTION = "Recipes:\nsdxl: clip-l, clip-g\nsd3: clip-l, clip-g / clip-l, t5 / clip-g, t5\nflux: clip-l, t5\nhidream: at least one of t5 or llama, recommended t5 and llama\nhunyuan_image: qwen2.5vl 7b and byt5 small\nnewbie: gemma-3-4b-it, jina clip v2" def load_clip(self, clip_name1, clip_name2, type, device="default"): clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION) @@ -1088,7 +1088,7 @@ class StyleModelApply: RETURN_TYPES = ("CONDITIONING",) FUNCTION = "apply_stylemodel" - CATEGORY = "model/conditioning/style_model" + CATEGORY = "model/conditioning" def apply_stylemodel(self, conditioning, style_model, clip_vision_output, strength, strength_type): cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) @@ -1518,13 +1518,11 @@ class LatentCrop: class SetLatentNoiseMask: @classmethod def INPUT_TYPES(s): - return {"required": { "samples": ("LATENT",), - "mask": ("MASK",), - }} + return {"required": { "samples": ("LATENT",), "mask": ("MASK",), }} RETURN_TYPES = ("LATENT",) FUNCTION = "set_mask" - CATEGORY = "model/latent/inpaint" + CATEGORY = "model/latent" def set_mask(self, samples, mask): s = samples.copy() @@ -2045,7 +2043,7 @@ NODE_CLASS_MAPPINGS = { "ImageBatch": ImageBatch, "ImagePadForOutpaint": ImagePadForOutpaint, "EmptyImage": EmptyImage, - "ConditioningAverage": ConditioningAverage , + "ConditioningAverage": ConditioningAverage, "ConditioningCombine": ConditioningCombine, "ConditioningConcat": ConditioningConcat, "ConditioningSetArea": ConditioningSetArea, @@ -2101,6 +2099,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "LoraLoader": "Load LoRA (Model and CLIP)", "LoraLoaderModelOnly": "Load LoRA", "CLIPLoader": "Load CLIP", + "DualCLIPLoader": "Load CLIP (Dual)", "ControlNetLoader": "Load ControlNet Model", "DiffControlNetLoader": "Load ControlNet Model (diff)", "StyleModelLoader": "Load Style Model", @@ -2108,6 +2107,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "UNETLoader": "Load Diffusion Model", "unCLIPCheckpointLoader": "Load unCLIP Checkpoint", "GLIGENLoader": "Load GLIGEN Model", + "DiffusersLoader": "Load Diffusers Model (DEPRECATED)", # Conditioning "CLIPVisionEncode": "CLIP Vision Encode", "StyleModelApply": "Apply Style Model", @@ -2115,12 +2115,16 @@ NODE_DISPLAY_NAME_MAPPINGS = { "CLIPSetLastLayer": "CLIP Set Last Layer", "ConditioningCombine": "Conditioning (Combine)", "ConditioningAverage ": "Conditioning (Average)", + "ConditioningAverage": "Conditioning (Average)", "ConditioningConcat": "Conditioning (Concat)", "ConditioningSetArea": "Conditioning (Set Area)", "ConditioningSetAreaPercentage": "Conditioning (Set Area with Percentage)", + "ConditioningSetAreaStrength": "Conditioning (Set Area Strength)", "ConditioningSetMask": "Conditioning (Set Mask)", "ControlNetApply": "Apply ControlNet (DEPRECATED)", "ControlNetApplyAdvanced": "Apply ControlNet", + "GLIGENTextBoxApply": "Apply GLIGEN Text Box", + "ConditioningZeroOut": "Conditioning Zero Out", # Latent "VAEEncodeForInpaint": "VAE Encode (for Inpainting)", "SetLatentNoiseMask": "Set Latent Noise Mask", @@ -2134,7 +2138,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "LatentUpscaleBy": "Upscale Latent By", "LatentComposite": "Latent Composite", "LatentBlend": "Latent Blend", - "LatentFromBatch" : "Latent From Batch", + "LatentFromBatch" : "Get Latent From Batch", "RepeatLatentBatch": "Repeat Latent Batch", # Image "EmptyImage": "Empty Image", From c44d261fc20a0a1032fccfe1d6472915a514d577 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Wed, 17 Jun 2026 08:52:55 +0800 Subject: [PATCH 41/52] Add new model blueprints (#14506) --- .../Character Replacement (SCAIL-2 Base).json | 4191 ++++++++++++++++ ...haracter Replacement (SCAIL-2 Extend).json | 4461 +++++++++++++++++ ...e Depth Estimation (Depth Anything 3).json | 569 +++ blueprints/Image Edit (Bernini-R).json | 3549 +++++++++++++ .../Image to Gaussian Splat (TripoSplat).json | 1983 ++++++++ .../Text to Image (Anima Base 1.0).json | 1088 ++++ blueprints/Text to Image (Anima).json | 7 +- blueprints/Text to Image (Ideogram v4).json | 2473 +++++++++ ...o Depth Estimation (Depth Anything 3).json | 825 +++ blueprints/Video Edit (Bernini-R).json | 3732 ++++++++++++++ 10 files changed, 22876 insertions(+), 2 deletions(-) create mode 100644 blueprints/Character Replacement (SCAIL-2 Base).json create mode 100644 blueprints/Character Replacement (SCAIL-2 Extend).json create mode 100644 blueprints/Image Depth Estimation (Depth Anything 3).json create mode 100644 blueprints/Image Edit (Bernini-R).json create mode 100644 blueprints/Image to Gaussian Splat (TripoSplat).json create mode 100644 blueprints/Text to Image (Anima Base 1.0).json create mode 100644 blueprints/Text to Image (Ideogram v4).json create mode 100644 blueprints/Video Depth Estimation (Depth Anything 3).json create mode 100644 blueprints/Video Edit (Bernini-R).json diff --git a/blueprints/Character Replacement (SCAIL-2 Base).json b/blueprints/Character Replacement (SCAIL-2 Base).json new file mode 100644 index 000000000..61803df65 --- /dev/null +++ b/blueprints/Character Replacement (SCAIL-2 Base).json @@ -0,0 +1,4191 @@ +{ + "revision": 0, + "last_node_id": 410, + "last_link_id": 0, + "nodes": [ + { + "id": 410, + "type": "35331397-69fb-40ad-b99a-7f17b1a53017", + "pos": [ + 2450, + 5670 + ], + "size": [ + 490, + 1120 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "pose_video", + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "reference_image", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "segment_index", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "replace_mode", + "name": "value_2", + "type": "BOOLEAN", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "width", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "label": "height", + "name": "value_4", + "type": "INT", + "widget": { + "name": "value_4" + }, + "link": null + }, + { + "label": "frame_count", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "name": "previous_frame_count", + "type": "INT", + "widget": { + "name": "previous_frame_count" + }, + "link": null + }, + { + "name": "pose_strength", + "type": "FLOAT", + "widget": { + "name": "pose_strength" + }, + "link": null + }, + { + "name": "pose_start", + "type": "FLOAT", + "widget": { + "name": "pose_start" + }, + "link": null + }, + { + "name": "pose_end", + "type": "FLOAT", + "widget": { + "name": "pose_end" + }, + "link": null + }, + { + "label": "turbo_mode", + "name": "value_5", + "type": "BOOLEAN", + "widget": { + "name": "value_5" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "distill_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "label": "dpo_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "clip_vision", + "name": "clip_name_1", + "type": "COMBO", + "widget": { + "name": "clip_name_1" + }, + "link": null + }, + { + "label": "sam3_video_object", + "name": "text_1", + "type": "STRING", + "widget": { + "name": "text_1" + }, + "link": null + }, + { + "label": "sam3_image_object", + "name": "text_2", + "type": "STRING", + "widget": { + "name": "text_2" + }, + "link": null + }, + { + "label": "sam3_model", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "405", + "text" + ], + [ + "391", + "value" + ], + [ + "398", + "value" + ], + [ + "387", + "value" + ], + [ + "388", + "value" + ], + [ + "386", + "length" + ], + [ + "406", + "previous_frame_count" + ], + [ + "406", + "pose_strength" + ], + [ + "406", + "pose_start" + ], + [ + "406", + "pose_end" + ], + [ + "402", + "value" + ], + [ + "374", + "unet_name" + ], + [ + "367", + "lora_name" + ], + [ + "408", + "lora_name" + ], + [ + "372", + "clip_name" + ], + [ + "368", + "vae_name" + ], + [ + "373", + "clip_name" + ], + [ + "393", + "text" + ], + [ + "407", + "text" + ], + [ + "394", + "ckpt_name" + ], + [ + "378", + "noise_seed" + ], + [ + "399", + "$$canvas-image-preview" + ], + [ + "400", + "$$canvas-image-preview" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Character Replacement (SCAIL-2 Base)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "35331397-69fb-40ad-b99a-7f17b1a53017", + "version": 1, + "state": { + "lastGroupId": 17, + "lastNodeId": 410, + "lastLinkId": 570, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Character Replacement (SCAIL-2 Base)", + "inputNode": { + "id": -10, + "bounding": [ + -440, + 4190, + 173.015625, + 508 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3980, + 3870, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "e9aedfaf-1991-4a75-ad9b-8c3a279b0488", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 409 + ], + "localized_name": "video", + "label": "pose_video", + "pos": [ + -290.984375, + 4214 + ] + }, + { + "id": "0badb26e-9abd-4e9c-b221-aab1237b8773", + "name": "images", + "type": "IMAGE", + "linkIds": [ + 469, + 408, + 473 + ], + "localized_name": "images", + "label": "reference_image", + "pos": [ + -290.984375, + 4234 + ] + }, + { + "id": "098f15aa-a066-422e-b491-eaf140bafb2c", + "name": "text", + "type": "STRING", + "linkIds": [ + 499 + ], + "label": "prompt", + "pos": [ + -290.984375, + 4254 + ] + }, + { + "id": "861ea850-8329-491b-9413-2588cdd77bbd", + "name": "value", + "type": "INT", + "linkIds": [ + 502 + ], + "label": "segment_index", + "pos": [ + -290.984375, + 4274 + ] + }, + { + "id": "dcf3ebf8-1263-458a-966b-9e77a5ad704a", + "name": "value_2", + "type": "BOOLEAN", + "linkIds": [ + 504 + ], + "label": "replace_mode", + "pos": [ + -290.984375, + 4294 + ] + }, + { + "id": "dbee10ce-bb98-4733-88a0-d4cd5b13c691", + "name": "value_3", + "type": "INT", + "linkIds": [ + 505 + ], + "label": "width", + "pos": [ + -290.984375, + 4314 + ] + }, + { + "id": "363e3ac7-0096-4480-b875-1d6ff1b8a2d9", + "name": "value_4", + "type": "INT", + "linkIds": [ + 506 + ], + "label": "height", + "pos": [ + -290.984375, + 4334 + ] + }, + { + "id": "4017e1ed-96cd-4c1c-9949-dc1a8c30386a", + "name": "length", + "type": "INT", + "linkIds": [ + 522, + 541 + ], + "label": "frame_count", + "pos": [ + -290.984375, + 4354 + ] + }, + { + "id": "617809fd-9647-4067-8f44-10d8acc8af82", + "name": "previous_frame_count", + "type": "INT", + "linkIds": [ + 542, + 543 + ], + "pos": [ + -290.984375, + 4374 + ] + }, + { + "id": "36ab49f6-528f-4301-a37a-0108aa7f5bec", + "name": "pose_strength", + "type": "FLOAT", + "linkIds": [ + 547 + ], + "pos": [ + -290.984375, + 4394 + ] + }, + { + "id": "24c2e93f-7bbc-4c6d-898e-36b440b8469b", + "name": "pose_start", + "type": "FLOAT", + "linkIds": [ + 548 + ], + "pos": [ + -290.984375, + 4414 + ] + }, + { + "id": "d1dbe632-bb2a-4c30-a7ea-5082f21dc686", + "name": "pose_end", + "type": "FLOAT", + "linkIds": [ + 549 + ], + "pos": [ + -290.984375, + 4434 + ] + }, + { + "id": "ddf07ba4-2837-40ef-925f-5996ea436334", + "name": "value_5", + "type": "BOOLEAN", + "linkIds": [ + 507 + ], + "label": "turbo_mode", + "pos": [ + -290.984375, + 4454 + ] + }, + { + "id": "ee8c796d-b326-40c6-9f9d-65f564053974", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 508 + ], + "pos": [ + -290.984375, + 4474 + ] + }, + { + "id": "6ccae991-51d7-4c8e-adc8-c7f6377d681b", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 509 + ], + "label": "distill_lora", + "pos": [ + -290.984375, + 4494 + ] + }, + { + "id": "30772913-ecf9-44f4-a638-2824404b24fb", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 565 + ], + "label": "dpo_lora", + "pos": [ + -290.984375, + 4514 + ] + }, + { + "id": "2e9c2347-93fe-462f-b5ef-51613fb52c85", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 510 + ], + "pos": [ + -290.984375, + 4534 + ] + }, + { + "id": "bc1822c0-7484-47b7-8d59-cf94788290f7", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 511 + ], + "pos": [ + -290.984375, + 4554 + ] + }, + { + "id": "bbf2ee19-58b3-46f4-af6a-cd1557d60c8d", + "name": "clip_name_1", + "type": "COMBO", + "linkIds": [ + 512 + ], + "label": "clip_vision", + "pos": [ + -290.984375, + 4574 + ] + }, + { + "id": "556415bd-8ef3-4cfa-b182-0d7e3e4cd572", + "name": "text_1", + "type": "STRING", + "linkIds": [ + 513 + ], + "label": "sam3_video_object", + "pos": [ + -290.984375, + 4594 + ] + }, + { + "id": "c577930c-85a3-4e92-ac44-8ed639937217", + "name": "text_2", + "type": "STRING", + "linkIds": [ + 514 + ], + "label": "sam3_image_object", + "pos": [ + -290.984375, + 4614 + ] + }, + { + "id": "8f542c5e-da91-4dad-8ff8-4e81a42d5140", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 515 + ], + "label": "sam3_model", + "pos": [ + -290.984375, + 4634 + ] + }, + { + "id": "3064d232-69a3-43b1-b350-05d8ca0e7c9f", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 558 + ], + "pos": [ + -290.984375, + 4654 + ] + } + ], + "outputs": [ + { + "id": "7ae88834-7553-45ec-a4e8-bab7d5276b45", + "name": "output", + "type": "IMAGE", + "linkIds": [ + 544 + ], + "localized_name": "output", + "pos": [ + 4004, + 3894 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 367, + "type": "LoraLoaderModelOnly", + "pos": [ + 300, + 3590 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 563 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 509 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 398, + 425 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + 0.8 + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 368, + "type": "VAELoader", + "pos": [ + 300, + 4020 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 511 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 406, + 407 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "Wan2_1_VAE_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan2_1_VAE_bf16.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "Wan2_1_VAE_bf16.safetensors" + ] + }, + { + "id": 369, + "type": "ResizeImageMaskNode", + "pos": [ + 1280, + 4860 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 491 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 444 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 446 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 420, + 492, + 493 + ] + } + ], + "properties": { + "Node name for S&R": "ResizeImageMaskNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "scale dimensions", + 512, + 512, + "center", + "area" + ] + }, + { + "id": 370, + "type": "GetVideoComponents", + "pos": [ + 270, + 4500 + ], + "size": [ + 230, + 90 + ], + "flags": { + "collapsed": true + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 409 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 490 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "bit_depth", + "name": "bit_depth", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 371, + "type": "GetImageSize", + "pos": [ + 1640, + 4500 + ], + "size": [ + 240, + 190 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 492 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 414 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 415 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 416 + ] + } + ], + "properties": { + "Node name for S&R": "GetImageSize", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 372, + "type": "CLIPLoader", + "pos": [ + 300, + 3790 + ], + "size": [ + 590, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 510 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 7, + 8 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 373, + "type": "CLIPVisionLoader", + "pos": [ + 300, + 4230 + ], + "size": [ + 590, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 512 + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION", + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 196 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "clip_vision_h.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/clip_vision/clip_vision_h.safetensors", + "directory": "clip_vision" + } + ] + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 374, + "type": "UNETLoader", + "pos": [ + 300, + 3190 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 508 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 562 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.1_14B_SCAIL_2_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/SCAIL-2/resolve/main/diffusion_models/wan2.1_14B_SCAIL_2_fp16.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "wan2.1_14B_SCAIL_2_fp16.safetensors", + "default" + ] + }, + { + "id": 375, + "type": "ModelSamplingSD3", + "pos": [ + 2760, + 3370 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 424 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 417 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 376, + "type": "KSamplerSelect", + "pos": [ + 2770, + 3520 + ], + "size": [ + 260, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 419 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 377, + "type": "BasicScheduler", + "pos": [ + 2770, + 3670 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 398 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 428 + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 418 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "simple", + 6, + 1 + ] + }, + { + "id": 378, + "type": "SamplerCustom", + "pos": [ + 3080, + 3370 + ], + "size": [ + 270, + 670 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 417 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 333 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 334 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 419 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 418 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 335 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 558 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 431 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true, + 1, + "fixed", + 1 + ] + }, + { + "id": 379, + "type": "PrimitiveInt", + "pos": [ + 2060, + 3750 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 427 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 6, + "fixed" + ] + }, + { + "id": 380, + "type": "PrimitiveFloat", + "pos": [ + 2060, + 3920 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 430 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 381, + "type": "PrimitiveInt", + "pos": [ + 2060, + 3380 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 426 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 40, + "fixed" + ] + }, + { + "id": 382, + "type": "PrimitiveFloat", + "pos": [ + 2060, + 3530 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 429 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 383, + "type": "ComfySwitchNode", + "pos": [ + 2400, + 3360 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 564 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 425 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 432 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 424 + ] + } + ], + "title": "Switch (Model)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 384, + "type": "ComfySwitchNode", + "pos": [ + 2400, + 3550 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 426 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 427 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 433 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 428 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 385, + "type": "ComfySwitchNode", + "pos": [ + 2400, + 3730 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 429 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 430 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 434 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 431 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 386, + "type": "ImageFromBatch", + "pos": [ + 880, + 4490 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 490 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": 450 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 522 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 491 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0, + 81 + ] + }, + { + "id": 387, + "type": "PrimitiveInt", + "pos": [ + 250, + 4820 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 505 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 441 + ] + } + ], + "title": "Int (Width)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 896, + "fixed" + ] + }, + { + "id": 388, + "type": "PrimitiveInt", + "pos": [ + 250, + 5000 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 506 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 445 + ] + } + ], + "title": "Int (Height)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 512, + "fixed" + ] + }, + { + "id": 389, + "type": "ComfyMathExpression", + "pos": [ + 690, + 4850 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 22, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 441 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 444 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(a // 32) * 32" + ] + }, + { + "id": 390, + "type": "ComfyMathExpression", + "pos": [ + 690, + 5030 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 23, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 445 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 446 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(a // 32) * 32" + ] + }, + { + "id": 391, + "type": "PrimitiveInt", + "pos": [ + 260, + 4570 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 502 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 447 + ] + } + ], + "title": "Int (segment index)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 1, + "fixed" + ] + }, + { + "id": 392, + "type": "ComfyMathExpression", + "pos": [ + 580, + 4590 + ], + "size": [ + 230, + 220 + ], + "flags": { + "collapsed": false + }, + "order": 25, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 447 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 541 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 543 + }, + { + "label": "d", + "localized_name": "values.d", + "name": "values.d", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 450 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(b - c) * (a - 1)" + ] + }, + { + "id": 393, + "type": "CLIPTextEncode", + "pos": [ + 660, + 5360 + ], + "size": [ + 380, + 160 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 454 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 513 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 461 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 394, + "type": "CheckpointLoaderSimple", + "pos": [ + 270, + 5340 + ], + "size": [ + 330, + 160 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 515 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 458, + 463 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 454, + 489 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [] + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "models": [ + { + "name": "sam3.1_multiplex_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/sam3.1/resolve/main/checkpoints/sam3.1_multiplex_fp16.safetensors", + "directory": "checkpoints" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "sam3.1_multiplex_fp16.safetensors" + ] + }, + { + "id": 395, + "type": "SAM3_VideoTrack", + "pos": [ + 1190, + 5320 + ], + "size": [ + 280, + 250 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "label": "images", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 493 + }, + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 458 + }, + { + "label": "initial_mask", + "localized_name": "initial_mask", + "name": "initial_mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 461 + }, + { + "localized_name": "detection_threshold", + "name": "detection_threshold", + "type": "FLOAT", + "widget": { + "name": "detection_threshold" + }, + "link": null + }, + { + "localized_name": "max_objects", + "name": "max_objects", + "type": "INT", + "widget": { + "name": "max_objects" + }, + "link": null + }, + { + "localized_name": "detect_interval", + "name": "detect_interval", + "type": "INT", + "widget": { + "name": "detect_interval" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "track_data", + "name": "track_data", + "type": "SAM3_TRACK_DATA", + "links": [ + 460 + ] + } + ], + "properties": { + "Node name for S&R": "SAM3_VideoTrack", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0.5, + 4, + 1 + ] + }, + { + "id": 396, + "type": "SCAIL2ColoredMask", + "pos": [ + 1550, + 5470 + ], + "size": [ + 370, + 200 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "driving_track_data", + "name": "driving_track_data", + "type": "SAM3_TRACK_DATA", + "link": 460 + }, + { + "localized_name": "ref_track_data", + "name": "ref_track_data", + "shape": 7, + "type": "SAM3_TRACK_DATA", + "link": 464 + }, + { + "localized_name": "object_indices", + "name": "object_indices", + "type": "STRING", + "widget": { + "name": "object_indices" + }, + "link": null + }, + { + "localized_name": "sort_by", + "name": "sort_by", + "type": "COMBO", + "widget": { + "name": "sort_by" + }, + "link": null + }, + { + "localized_name": "replacement_mode", + "name": "replacement_mode", + "type": "BOOLEAN", + "widget": { + "name": "replacement_mode" + }, + "link": 476 + } + ], + "outputs": [ + { + "localized_name": "pose_video_mask", + "name": "pose_video_mask", + "type": "IMAGE", + "links": [ + 466, + 467 + ] + }, + { + "localized_name": "reference_image_mask", + "name": "reference_image_mask", + "type": "IMAGE", + "links": [ + 465, + 472 + ] + } + ], + "properties": { + "Node name for S&R": "SCAIL2ColoredMask", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "", + "left_to_right", + true + ] + }, + { + "id": 397, + "type": "SAM3_VideoTrack", + "pos": [ + 1190, + 5620 + ], + "size": [ + 280, + 250 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "label": "images", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 469 + }, + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 463 + }, + { + "label": "initial_mask", + "localized_name": "initial_mask", + "name": "initial_mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 488 + }, + { + "localized_name": "detection_threshold", + "name": "detection_threshold", + "type": "FLOAT", + "widget": { + "name": "detection_threshold" + }, + "link": null + }, + { + "localized_name": "max_objects", + "name": "max_objects", + "type": "INT", + "widget": { + "name": "max_objects" + }, + "link": null + }, + { + "localized_name": "detect_interval", + "name": "detect_interval", + "type": "INT", + "widget": { + "name": "detect_interval" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "track_data", + "name": "track_data", + "type": "SAM3_TRACK_DATA", + "links": [ + 464 + ] + } + ], + "properties": { + "Node name for S&R": "SAM3_VideoTrack", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0.5, + 4, + 1 + ] + }, + { + "id": 398, + "type": "PrimitiveBoolean", + "pos": [ + 1660, + 4200 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 504 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 475, + 476 + ] + } + ], + "title": "Boolean (Replace Mode)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 399, + "type": "PreviewImage", + "pos": [ + 2040, + 4470 + ], + "size": [ + 350, + 1190 + ], + "flags": {}, + "order": 32, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 466 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "PreviewImage", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 400, + "type": "PreviewImage", + "pos": [ + 2440, + 4470 + ], + "size": [ + 230, + 310 + ], + "flags": {}, + "order": 33, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 465 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "PreviewImage", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 401, + "type": "VAEDecode", + "pos": [ + 3080, + 4120 + ], + "size": [ + 270, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 407 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 544 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 402, + "type": "PrimitiveBoolean", + "pos": [ + 2410, + 3940 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 507 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 432, + 433, + 434 + ] + } + ], + "title": "Boolean (Enable Distill LoRA?)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 403, + "type": "CLIPVisionEncode", + "pos": [ + 1200, + 4260 + ], + "size": [ + 230, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_vision", + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 196 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 408 + }, + { + "localized_name": "crop", + "name": "crop", + "type": "COMBO", + "widget": { + "name": "crop" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION_OUTPUT", + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 404 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 404, + "type": "CLIPTextEncode", + "pos": [ + 1000, + 3970 + ], + "size": [ + 520, + 210 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 8 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 326 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 405, + "type": "CLIPTextEncode", + "pos": [ + 990, + 3170 + ], + "size": [ + 550, + 750 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 7 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 499 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 325 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "A young woman with dark hair tied in a neat high bun, with a few loose strands framing her face, is dancing outdoors on a sunny coastal hillside. She has a normal-sized head and a slim face, with no hat, no headwear, and no oversized hair volume. She wears a fitted black long-sleeve crop top with a shoulder cutout, extremely baggy black cargo pants with straps and pockets, and chunky black combat boots. She performs energetic dance moves with one leg lifted and arms extended, moving naturally in front of a large tree, a small white stone house with a terracotta roof, and a bright blue sea under a clear sky with light clouds." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 406, + "type": "WanSCAILToVideo", + "pos": [ + 1640, + 3400 + ], + "size": [ + 310, + 580 + ], + "flags": {}, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 325 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 326 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 406 + }, + { + "localized_name": "pose_video", + "name": "pose_video", + "shape": 7, + "type": "IMAGE", + "link": 420 + }, + { + "localized_name": "pose_video_mask", + "name": "pose_video_mask", + "shape": 7, + "type": "IMAGE", + "link": 467 + }, + { + "localized_name": "reference_image", + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 473 + }, + { + "localized_name": "reference_image_mask", + "name": "reference_image_mask", + "shape": 7, + "type": "IMAGE", + "link": 472 + }, + { + "localized_name": "clip_vision_output", + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 404 + }, + { + "localized_name": "previous_frames", + "name": "previous_frames", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 414 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 415 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 416 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + }, + { + "localized_name": "pose_strength", + "name": "pose_strength", + "type": "FLOAT", + "widget": { + "name": "pose_strength" + }, + "link": 547 + }, + { + "localized_name": "pose_start", + "name": "pose_start", + "type": "FLOAT", + "widget": { + "name": "pose_start" + }, + "link": 548 + }, + { + "localized_name": "pose_end", + "name": "pose_end", + "type": "FLOAT", + "widget": { + "name": "pose_end" + }, + "link": 549 + }, + { + "localized_name": "video_frame_offset", + "name": "video_frame_offset", + "type": "INT", + "widget": { + "name": "video_frame_offset" + }, + "link": null + }, + { + "localized_name": "previous_frame_count", + "name": "previous_frame_count", + "type": "INT", + "widget": { + "name": "previous_frame_count" + }, + "link": 542 + }, + { + "localized_name": "replacement_mode", + "name": "replacement_mode", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "replacement_mode" + }, + "link": 475 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 333 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 334 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 335 + ] + }, + { + "localized_name": "video_frame_offset", + "name": "video_frame_offset", + "type": "INT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "WanSCAILToVideo", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 512, + 896, + 65, + 1, + 1, + 0, + 1, + 0, + 5, + true + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 407, + "type": "CLIPTextEncode", + "pos": [ + 670, + 5690 + ], + "size": [ + 380, + 160 + ], + "flags": {}, + "order": 40, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 489 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 514 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 488 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 408, + "type": "LoraLoaderModelOnly", + "pos": [ + 310, + 3390 + ], + "size": [ + 580, + 140 + ], + "flags": {}, + "order": 41, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 562 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 565 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 563, + 564 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/SCAIL-2/resolve/main/loras/wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + 1 + ] + } + ], + "groups": [ + { + "id": 3, + "title": "Models", + "bounding": [ + 240, + 3100, + 710, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "Prompt", + "bounding": [ + 980, + 3100, + 570, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 5, + "title": "Sampling", + "bounding": [ + 2730, + 3100, + 660, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 6, + "title": "SCAIL Conditioning", + "bounding": [ + 1580, + 3100, + 420, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 9, + "title": "Switch Settings", + "bounding": [ + 2030, + 3100, + 670, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 7, + "title": "Original Settings", + "bounding": [ + 2050, + 3320, + 300, + 340 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 8, + "title": "Distill LoRA Settings", + "bounding": [ + 2050, + 3680, + 300, + 370 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 10, + "title": "Create Mask", + "bounding": [ + 240, + 5230, + 1760, + 670 + ], + "flags": {} + }, + { + "id": 11, + "title": "Resize Images", + "bounding": [ + 240, + 4750, + 1760, + 450 + ], + "flags": {} + }, + { + "id": 12, + "title": "Trim Frames from Video", + "bounding": [ + 240, + 4410, + 1760, + 310 + ], + "flags": {} + }, + { + "id": 13, + "title": "Preview Masks", + "bounding": [ + 2030, + 4410, + 660, + 1490 + ], + "flags": {} + } + ], + "links": [ + { + "id": 491, + "origin_id": 386, + "origin_slot": 0, + "target_id": 369, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 444, + "origin_id": 389, + "origin_slot": 1, + "target_id": 369, + "target_slot": 2, + "type": "INT" + }, + { + "id": 446, + "origin_id": 390, + "origin_slot": 1, + "target_id": 369, + "target_slot": 3, + "type": "INT" + }, + { + "id": 492, + "origin_id": 369, + "origin_slot": 0, + "target_id": 371, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 424, + "origin_id": 383, + "origin_slot": 0, + "target_id": 375, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 398, + "origin_id": 367, + "origin_slot": 0, + "target_id": 377, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 428, + "origin_id": 384, + "origin_slot": 0, + "target_id": 377, + "target_slot": 2, + "type": "INT" + }, + { + "id": 417, + "origin_id": 375, + "origin_slot": 0, + "target_id": 378, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 333, + "origin_id": 406, + "origin_slot": 0, + "target_id": 378, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 334, + "origin_id": 406, + "origin_slot": 1, + "target_id": 378, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 419, + "origin_id": 376, + "origin_slot": 0, + "target_id": 378, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 418, + "origin_id": 377, + "origin_slot": 0, + "target_id": 378, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 335, + "origin_id": 406, + "origin_slot": 2, + "target_id": 378, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 431, + "origin_id": 385, + "origin_slot": 0, + "target_id": 378, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 425, + "origin_id": 367, + "origin_slot": 0, + "target_id": 383, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 432, + "origin_id": 402, + "origin_slot": 0, + "target_id": 383, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 426, + "origin_id": 381, + "origin_slot": 0, + "target_id": 384, + "target_slot": 0, + "type": "INT" + }, + { + "id": 427, + "origin_id": 379, + "origin_slot": 0, + "target_id": 384, + "target_slot": 1, + "type": "INT" + }, + { + "id": 433, + "origin_id": 402, + "origin_slot": 0, + "target_id": 384, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 429, + "origin_id": 382, + "origin_slot": 0, + "target_id": 385, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 430, + "origin_id": 380, + "origin_slot": 0, + "target_id": 385, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 434, + "origin_id": 402, + "origin_slot": 0, + "target_id": 385, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 490, + "origin_id": 370, + "origin_slot": 0, + "target_id": 386, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 450, + "origin_id": 392, + "origin_slot": 1, + "target_id": 386, + "target_slot": 1, + "type": "INT" + }, + { + "id": 441, + "origin_id": 387, + "origin_slot": 0, + "target_id": 389, + "target_slot": 0, + "type": "INT" + }, + { + "id": 445, + "origin_id": 388, + "origin_slot": 0, + "target_id": 390, + "target_slot": 0, + "type": "INT" + }, + { + "id": 447, + "origin_id": 391, + "origin_slot": 0, + "target_id": 392, + "target_slot": 0, + "type": "INT" + }, + { + "id": 454, + "origin_id": 394, + "origin_slot": 1, + "target_id": 393, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 493, + "origin_id": 369, + "origin_slot": 0, + "target_id": 395, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 458, + "origin_id": 394, + "origin_slot": 0, + "target_id": 395, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 461, + "origin_id": 393, + "origin_slot": 0, + "target_id": 395, + "target_slot": 3, + "type": "CONDITIONING" + }, + { + "id": 460, + "origin_id": 395, + "origin_slot": 0, + "target_id": 396, + "target_slot": 0, + "type": "SAM3_TRACK_DATA" + }, + { + "id": 464, + "origin_id": 397, + "origin_slot": 0, + "target_id": 396, + "target_slot": 1, + "type": "SAM3_TRACK_DATA" + }, + { + "id": 476, + "origin_id": 398, + "origin_slot": 0, + "target_id": 396, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 463, + "origin_id": 394, + "origin_slot": 0, + "target_id": 397, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 488, + "origin_id": 407, + "origin_slot": 0, + "target_id": 397, + "target_slot": 3, + "type": "CONDITIONING" + }, + { + "id": 466, + "origin_id": 396, + "origin_slot": 0, + "target_id": 399, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 465, + "origin_id": 396, + "origin_slot": 1, + "target_id": 400, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 124, + "origin_id": 378, + "origin_slot": 1, + "target_id": 401, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 407, + "origin_id": 368, + "origin_slot": 0, + "target_id": 401, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 196, + "origin_id": 373, + "origin_slot": 0, + "target_id": 403, + "target_slot": 0, + "type": "CLIP_VISION" + }, + { + "id": 8, + "origin_id": 372, + "origin_slot": 0, + "target_id": 404, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 7, + "origin_id": 372, + "origin_slot": 0, + "target_id": 405, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 325, + "origin_id": 405, + "origin_slot": 0, + "target_id": 406, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 326, + "origin_id": 404, + "origin_slot": 0, + "target_id": 406, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 406, + "origin_id": 368, + "origin_slot": 0, + "target_id": 406, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 420, + "origin_id": 369, + "origin_slot": 0, + "target_id": 406, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 467, + "origin_id": 396, + "origin_slot": 0, + "target_id": 406, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 472, + "origin_id": 396, + "origin_slot": 1, + "target_id": 406, + "target_slot": 6, + "type": "IMAGE" + }, + { + "id": 404, + "origin_id": 403, + "origin_slot": 0, + "target_id": 406, + "target_slot": 7, + "type": "CLIP_VISION_OUTPUT" + }, + { + "id": 414, + "origin_id": 371, + "origin_slot": 0, + "target_id": 406, + "target_slot": 9, + "type": "INT" + }, + { + "id": 415, + "origin_id": 371, + "origin_slot": 1, + "target_id": 406, + "target_slot": 10, + "type": "INT" + }, + { + "id": 416, + "origin_id": 371, + "origin_slot": 2, + "target_id": 406, + "target_slot": 11, + "type": "INT" + }, + { + "id": 475, + "origin_id": 398, + "origin_slot": 0, + "target_id": 406, + "target_slot": 18, + "type": "BOOLEAN" + }, + { + "id": 489, + "origin_id": 394, + "origin_slot": 1, + "target_id": 407, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 409, + "origin_id": -10, + "origin_slot": 0, + "target_id": 370, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 469, + "origin_id": -10, + "origin_slot": 1, + "target_id": 397, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 408, + "origin_id": -10, + "origin_slot": 1, + "target_id": 403, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 473, + "origin_id": -10, + "origin_slot": 1, + "target_id": 406, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 499, + "origin_id": -10, + "origin_slot": 2, + "target_id": 405, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 502, + "origin_id": -10, + "origin_slot": 3, + "target_id": 391, + "target_slot": 0, + "type": "INT" + }, + { + "id": 504, + "origin_id": -10, + "origin_slot": 4, + "target_id": 398, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 505, + "origin_id": -10, + "origin_slot": 5, + "target_id": 387, + "target_slot": 0, + "type": "INT" + }, + { + "id": 506, + "origin_id": -10, + "origin_slot": 6, + "target_id": 388, + "target_slot": 0, + "type": "INT" + }, + { + "id": 507, + "origin_id": -10, + "origin_slot": 12, + "target_id": 402, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 508, + "origin_id": -10, + "origin_slot": 13, + "target_id": 374, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 509, + "origin_id": -10, + "origin_slot": 14, + "target_id": 367, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 510, + "origin_id": -10, + "origin_slot": 16, + "target_id": 372, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 511, + "origin_id": -10, + "origin_slot": 17, + "target_id": 368, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 512, + "origin_id": -10, + "origin_slot": 18, + "target_id": 373, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 513, + "origin_id": -10, + "origin_slot": 19, + "target_id": 393, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 514, + "origin_id": -10, + "origin_slot": 20, + "target_id": 407, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 515, + "origin_id": -10, + "origin_slot": 21, + "target_id": 394, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 522, + "origin_id": -10, + "origin_slot": 7, + "target_id": 386, + "target_slot": 2, + "type": "INT" + }, + { + "id": 541, + "origin_id": -10, + "origin_slot": 7, + "target_id": 392, + "target_slot": 1, + "type": "FLOAT,INT,BOOLEAN" + }, + { + "id": 542, + "origin_id": -10, + "origin_slot": 8, + "target_id": 406, + "target_slot": 17, + "type": "INT" + }, + { + "id": 543, + "origin_id": -10, + "origin_slot": 8, + "target_id": 392, + "target_slot": 2, + "type": "FLOAT,INT,BOOLEAN" + }, + { + "id": 544, + "origin_id": 401, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 547, + "origin_id": -10, + "origin_slot": 9, + "target_id": 406, + "target_slot": 13, + "type": "FLOAT" + }, + { + "id": 548, + "origin_id": -10, + "origin_slot": 10, + "target_id": 406, + "target_slot": 14, + "type": "FLOAT" + }, + { + "id": 549, + "origin_id": -10, + "origin_slot": 11, + "target_id": 406, + "target_slot": 15, + "type": "FLOAT" + }, + { + "id": 558, + "origin_id": -10, + "origin_slot": 22, + "target_id": 378, + "target_slot": 7, + "type": "INT" + }, + { + "id": 562, + "origin_id": 374, + "origin_slot": 0, + "target_id": 408, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 563, + "origin_id": 408, + "origin_slot": 0, + "target_id": 367, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 564, + "origin_id": 408, + "origin_slot": 0, + "target_id": 383, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 565, + "origin_id": -10, + "origin_slot": 15, + "target_id": 408, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video generation and editing/Video Edit", + "description": "Replaces a character in a video with a reference image using the SCAIL-2 model for end-to-end controlled animation without intermediate pose maps. Key inputs include a source video, a reference character image, and optional text prompts for style or context. Suitable for animated or live-action footage, multi-character scenes, and creative video editing where direct pose-free animation is needed; works best with moderate-length videos." + } + ] + }, + "extra": { + "BlueprintDescription": "Replaces a character in a video with a reference image using the SCAIL-2 model for end-to-end controlled animation without intermediate pose maps. Key inputs include a source video, a reference character image, and optional text prompts for style or context. Suitable for animated or live-action footage, multi-character scenes, and creative video editing where direct pose-free animation is needed; works best with moderate-length videos.", + "BlueprintSearchAliases": [ + "character replacement", + "SCAIL-2", + "pose-free animation", + "video editing", + "cross-identity replacement" + ] + } +} \ No newline at end of file diff --git a/blueprints/Character Replacement (SCAIL-2 Extend).json b/blueprints/Character Replacement (SCAIL-2 Extend).json new file mode 100644 index 000000000..bf2f67dfd --- /dev/null +++ b/blueprints/Character Replacement (SCAIL-2 Extend).json @@ -0,0 +1,4461 @@ +{ + "revision": 0, + "last_node_id": 409, + "last_link_id": 0, + "nodes": [ + { + "id": 409, + "type": "2ebfb952-bd3b-43c3-9390-9ea73d41bd1f", + "pos": [ + 3420, + 5580 + ], + "size": [ + 530, + 1140 + ], + "flags": { + "collapsed": false + }, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "pose_video", + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "reference_image", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": null + }, + { + "label": "previous_frames", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "segment_index", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "replace_mode", + "name": "value_2", + "type": "BOOLEAN", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "width", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "label": "height", + "name": "value_4", + "type": "INT", + "widget": { + "name": "value_4" + }, + "link": null + }, + { + "label": "frame_count", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "name": "previous_frame_count", + "type": "INT", + "widget": { + "name": "previous_frame_count" + }, + "link": null + }, + { + "name": "pose_strength", + "type": "FLOAT", + "widget": { + "name": "pose_strength" + }, + "link": null + }, + { + "name": "pose_start", + "type": "FLOAT", + "widget": { + "name": "pose_start" + }, + "link": null + }, + { + "name": "pose_end", + "type": "FLOAT", + "widget": { + "name": "pose_end" + }, + "link": null + }, + { + "label": "turbo_mode", + "name": "value_5", + "type": "BOOLEAN", + "widget": { + "name": "value_5" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "distill_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "label": "dpo_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "clip_vision", + "name": "clip_name_1", + "type": "COMBO", + "widget": { + "name": "clip_name_1" + }, + "link": null + }, + { + "label": "sam3_video_object", + "name": "text_1", + "type": "STRING", + "widget": { + "name": "text_1" + }, + "link": null + }, + { + "label": "sam3_image_object", + "name": "text_2", + "type": "STRING", + "widget": { + "name": "text_2" + }, + "link": null + }, + { + "label": "sam3_model", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "363", + "text" + ], + [ + "346", + "value" + ], + [ + "353", + "value" + ], + [ + "342", + "value" + ], + [ + "343", + "value" + ], + [ + "341", + "length" + ], + [ + "364", + "previous_frame_count" + ], + [ + "364", + "pose_strength" + ], + [ + "364", + "pose_start" + ], + [ + "364", + "pose_end" + ], + [ + "360", + "value" + ], + [ + "329", + "unet_name" + ], + [ + "322", + "lora_name" + ], + [ + "366", + "lora_name" + ], + [ + "327", + "clip_name" + ], + [ + "323", + "vae_name" + ], + [ + "328", + "clip_name" + ], + [ + "348", + "text" + ], + [ + "365", + "text" + ], + [ + "349", + "ckpt_name" + ], + [ + "333", + "noise_seed" + ], + [ + "354", + "$$canvas-image-preview" + ], + [ + "355", + "$$canvas-image-preview" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Character Replacement (SCAIL-2 Extend)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2ebfb952-bd3b-43c3-9390-9ea73d41bd1f", + "version": 1, + "state": { + "lastGroupId": 17, + "lastNodeId": 410, + "lastLinkId": 570, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Character Replacement (SCAIL-2 Extend)", + "inputNode": { + "id": -10, + "bounding": [ + -300, + 4240, + 173.015625, + 528 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3570, + 4500, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "e9aedfaf-1991-4a75-ad9b-8c3a279b0488", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 409 + ], + "localized_name": "video", + "label": "pose_video", + "pos": [ + -150.984375, + 4264 + ] + }, + { + "id": "0badb26e-9abd-4e9c-b221-aab1237b8773", + "name": "images", + "type": "IMAGE", + "linkIds": [ + 469, + 408, + 473 + ], + "localized_name": "images", + "label": "reference_image", + "pos": [ + -150.984375, + 4284 + ] + }, + { + "id": "9f424e9e-d5a2-4dd9-9934-cc6f6fc8da09", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 486, + 485 + ], + "localized_name": "image", + "label": "previous_frames", + "pos": [ + -150.984375, + 4304 + ] + }, + { + "id": "098f15aa-a066-422e-b491-eaf140bafb2c", + "name": "text", + "type": "STRING", + "linkIds": [ + 499 + ], + "label": "prompt", + "pos": [ + -150.984375, + 4324 + ] + }, + { + "id": "861ea850-8329-491b-9413-2588cdd77bbd", + "name": "value", + "type": "INT", + "linkIds": [ + 502 + ], + "label": "segment_index", + "pos": [ + -150.984375, + 4344 + ] + }, + { + "id": "dcf3ebf8-1263-458a-966b-9e77a5ad704a", + "name": "value_2", + "type": "BOOLEAN", + "linkIds": [ + 504 + ], + "label": "replace_mode", + "pos": [ + -150.984375, + 4364 + ] + }, + { + "id": "dbee10ce-bb98-4733-88a0-d4cd5b13c691", + "name": "value_3", + "type": "INT", + "linkIds": [ + 505 + ], + "label": "width", + "pos": [ + -150.984375, + 4384 + ] + }, + { + "id": "363e3ac7-0096-4480-b875-1d6ff1b8a2d9", + "name": "value_4", + "type": "INT", + "linkIds": [ + 506 + ], + "label": "height", + "pos": [ + -150.984375, + 4404 + ] + }, + { + "id": "dc19eaf1-dd24-4c1e-9b08-6d8d98e0e7f2", + "name": "length", + "type": "INT", + "linkIds": [ + 550, + 557 + ], + "label": "frame_count", + "pos": [ + -150.984375, + 4424 + ] + }, + { + "id": "39d2f098-1740-419e-9c29-e4c4e4a7c3fd", + "name": "previous_frame_count", + "type": "INT", + "linkIds": [ + 551, + 556 + ], + "pos": [ + -150.984375, + 4444 + ] + }, + { + "id": "8d248225-a7f5-42c5-86bd-31eaf695f66e", + "name": "pose_strength", + "type": "FLOAT", + "linkIds": [ + 552 + ], + "pos": [ + -150.984375, + 4464 + ] + }, + { + "id": "04c56360-963a-48ba-944e-4864e2c0349c", + "name": "pose_start", + "type": "FLOAT", + "linkIds": [ + 553 + ], + "pos": [ + -150.984375, + 4484 + ] + }, + { + "id": "1ecd0315-d61f-4986-837c-27fb3f2d0470", + "name": "pose_end", + "type": "FLOAT", + "linkIds": [ + 554 + ], + "pos": [ + -150.984375, + 4504 + ] + }, + { + "id": "ddf07ba4-2837-40ef-925f-5996ea436334", + "name": "value_5", + "type": "BOOLEAN", + "linkIds": [ + 507 + ], + "label": "turbo_mode", + "pos": [ + -150.984375, + 4524 + ] + }, + { + "id": "ee8c796d-b326-40c6-9f9d-65f564053974", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 508 + ], + "pos": [ + -150.984375, + 4544 + ] + }, + { + "id": "6ccae991-51d7-4c8e-adc8-c7f6377d681b", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 509 + ], + "label": "distill_lora", + "pos": [ + -150.984375, + 4564 + ] + }, + { + "id": "1f1ac950-6ce5-4253-a266-edba58acd135", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 569 + ], + "label": "dpo_lora", + "pos": [ + -150.984375, + 4584 + ] + }, + { + "id": "2e9c2347-93fe-462f-b5ef-51613fb52c85", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 510 + ], + "pos": [ + -150.984375, + 4604 + ] + }, + { + "id": "bc1822c0-7484-47b7-8d59-cf94788290f7", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 511 + ], + "pos": [ + -150.984375, + 4624 + ] + }, + { + "id": "bbf2ee19-58b3-46f4-af6a-cd1557d60c8d", + "name": "clip_name_1", + "type": "COMBO", + "linkIds": [ + 512 + ], + "label": "clip_vision", + "pos": [ + -150.984375, + 4644 + ] + }, + { + "id": "556415bd-8ef3-4cfa-b182-0d7e3e4cd572", + "name": "text_1", + "type": "STRING", + "linkIds": [ + 513 + ], + "label": "sam3_video_object", + "pos": [ + -150.984375, + 4664 + ] + }, + { + "id": "c577930c-85a3-4e92-ac44-8ed639937217", + "name": "text_2", + "type": "STRING", + "linkIds": [ + 514 + ], + "label": "sam3_image_object", + "pos": [ + -150.984375, + 4684 + ] + }, + { + "id": "8f542c5e-da91-4dad-8ff8-4e81a42d5140", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 515 + ], + "label": "sam3_model", + "pos": [ + -150.984375, + 4704 + ] + }, + { + "id": "9764a497-f066-429c-b77c-bcd5b8c7a68d", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 559 + ], + "pos": [ + -150.984375, + 4724 + ] + } + ], + "outputs": [ + { + "id": "7ae88834-7553-45ec-a4e8-bab7d5276b45", + "name": "output", + "type": "IMAGE", + "linkIds": [ + 546 + ], + "localized_name": "output", + "pos": [ + 3594, + 4524 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 322, + "type": "LoraLoaderModelOnly", + "pos": [ + 300, + 3590 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 566 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 509 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 398, + 425 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors", + 0.8 + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 323, + "type": "VAELoader", + "pos": [ + 300, + 4020 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 511 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 406, + 407 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "Wan2_1_VAE_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan2_1_VAE_bf16.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "Wan2_1_VAE_bf16.safetensors" + ] + }, + { + "id": 324, + "type": "ResizeImageMaskNode", + "pos": [ + 1280, + 4880 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 491 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 444 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 446 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 420, + 492, + 493 + ] + } + ], + "properties": { + "Node name for S&R": "ResizeImageMaskNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "scale dimensions", + 512, + 512, + "center", + "area" + ] + }, + { + "id": 325, + "type": "GetVideoComponents", + "pos": [ + 270, + 4520 + ], + "size": [ + 230, + 90 + ], + "flags": { + "collapsed": true + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 409 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 490 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "bit_depth", + "name": "bit_depth", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 326, + "type": "GetImageSize", + "pos": [ + 1640, + 4520 + ], + "size": [ + 240, + 190 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 492 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 414 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 415 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 416 + ] + } + ], + "properties": { + "Node name for S&R": "GetImageSize", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 327, + "type": "CLIPLoader", + "pos": [ + 300, + 3790 + ], + "size": [ + 590, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 510 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 7, + 8 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 328, + "type": "CLIPVisionLoader", + "pos": [ + 300, + 4230 + ], + "size": [ + 590, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 512 + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION", + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 196 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "clip_vision_h.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/clip_vision/clip_vision_h.safetensors", + "directory": "clip_vision" + } + ] + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 329, + "type": "UNETLoader", + "pos": [ + 300, + 3190 + ], + "size": [ + 590, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 508 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 568 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.1_14B_SCAIL_2_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/SCAIL-2/resolve/main/diffusion_models/wan2.1_14B_SCAIL_2_fp16.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "wan2.1_14B_SCAIL_2_fp16.safetensors", + "default" + ] + }, + { + "id": 330, + "type": "ModelSamplingSD3", + "pos": [ + 2820, + 3430 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 424 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 417 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 331, + "type": "KSamplerSelect", + "pos": [ + 2830, + 3580 + ], + "size": [ + 260, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 419 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 332, + "type": "BasicScheduler", + "pos": [ + 2830, + 3730 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 398 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 428 + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 418 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "simple", + 6, + 1 + ] + }, + { + "id": 333, + "type": "SamplerCustom", + "pos": [ + 3140, + 3430 + ], + "size": [ + 270, + 670 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 417 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 333 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 334 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 419 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 418 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 335 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 559 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 431 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true, + 1, + "fixed", + 1 + ] + }, + { + "id": 334, + "type": "PrimitiveInt", + "pos": [ + 2090, + 3760 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 427 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 6, + "fixed" + ] + }, + { + "id": 335, + "type": "PrimitiveFloat", + "pos": [ + 2090, + 3930 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 430 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 336, + "type": "PrimitiveInt", + "pos": [ + 2090, + 3390 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 426 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 40, + "fixed" + ] + }, + { + "id": 337, + "type": "PrimitiveFloat", + "pos": [ + 2090, + 3540 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 429 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 338, + "type": "ComfySwitchNode", + "pos": [ + 2430, + 3370 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 567 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 425 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 432 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 424 + ] + } + ], + "title": "Switch (Model)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 339, + "type": "ComfySwitchNode", + "pos": [ + 2430, + 3560 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 426 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 427 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 433 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 428 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 340, + "type": "ComfySwitchNode", + "pos": [ + 2430, + 3740 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 429 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 430 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 434 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 431 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 341, + "type": "ImageFromBatch", + "pos": [ + 880, + 4510 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 490 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": 450 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 550 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 491 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0, + 81 + ] + }, + { + "id": 342, + "type": "PrimitiveInt", + "pos": [ + 250, + 4840 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 505 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 441 + ] + } + ], + "title": "Int (Width)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 896, + "fixed" + ] + }, + { + "id": 343, + "type": "PrimitiveInt", + "pos": [ + 250, + 5020 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 506 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 445 + ] + } + ], + "title": "Int (Height)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 512, + "fixed" + ] + }, + { + "id": 344, + "type": "ComfyMathExpression", + "pos": [ + 690, + 4870 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 22, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 441 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 444 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(a // 32) * 32" + ] + }, + { + "id": 345, + "type": "ComfyMathExpression", + "pos": [ + 690, + 5050 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 23, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 445 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 446 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(a // 32) * 32" + ] + }, + { + "id": 346, + "type": "PrimitiveInt", + "pos": [ + 270, + 4580 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 502 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 447 + ] + } + ], + "title": "Int (segment index)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 2, + "fixed" + ] + }, + { + "id": 347, + "type": "ComfyMathExpression", + "pos": [ + 580, + 4610 + ], + "size": [ + 230, + 120 + ], + "flags": { + "collapsed": true + }, + "order": 25, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 447 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 557 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 556 + }, + { + "label": "d", + "localized_name": "values.d", + "name": "values.d", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 450 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "(b - c) * (a - 1)" + ] + }, + { + "id": 348, + "type": "CLIPTextEncode", + "pos": [ + 660, + 5380 + ], + "size": [ + 380, + 160 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 454 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 513 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 461 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 349, + "type": "CheckpointLoaderSimple", + "pos": [ + 270, + 5360 + ], + "size": [ + 330, + 160 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 515 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 458, + 463 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 454, + 489 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [] + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "models": [ + { + "name": "sam3.1_multiplex_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/sam3.1/resolve/main/checkpoints/sam3.1_multiplex_fp16.safetensors", + "directory": "checkpoints" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "sam3.1_multiplex_fp16.safetensors" + ] + }, + { + "id": 350, + "type": "SAM3_VideoTrack", + "pos": [ + 1190, + 5340 + ], + "size": [ + 280, + 250 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "label": "images", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 493 + }, + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 458 + }, + { + "label": "initial_mask", + "localized_name": "initial_mask", + "name": "initial_mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 461 + }, + { + "localized_name": "detection_threshold", + "name": "detection_threshold", + "type": "FLOAT", + "widget": { + "name": "detection_threshold" + }, + "link": null + }, + { + "localized_name": "max_objects", + "name": "max_objects", + "type": "INT", + "widget": { + "name": "max_objects" + }, + "link": null + }, + { + "localized_name": "detect_interval", + "name": "detect_interval", + "type": "INT", + "widget": { + "name": "detect_interval" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "track_data", + "name": "track_data", + "type": "SAM3_TRACK_DATA", + "links": [ + 460 + ] + } + ], + "properties": { + "Node name for S&R": "SAM3_VideoTrack", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0.5, + 4, + 1 + ] + }, + { + "id": 351, + "type": "SCAIL2ColoredMask", + "pos": [ + 1550, + 5490 + ], + "size": [ + 370, + 200 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "driving_track_data", + "name": "driving_track_data", + "type": "SAM3_TRACK_DATA", + "link": 460 + }, + { + "localized_name": "ref_track_data", + "name": "ref_track_data", + "shape": 7, + "type": "SAM3_TRACK_DATA", + "link": 464 + }, + { + "localized_name": "object_indices", + "name": "object_indices", + "type": "STRING", + "widget": { + "name": "object_indices" + }, + "link": null + }, + { + "localized_name": "sort_by", + "name": "sort_by", + "type": "COMBO", + "widget": { + "name": "sort_by" + }, + "link": null + }, + { + "localized_name": "replacement_mode", + "name": "replacement_mode", + "type": "BOOLEAN", + "widget": { + "name": "replacement_mode" + }, + "link": 476 + } + ], + "outputs": [ + { + "localized_name": "pose_video_mask", + "name": "pose_video_mask", + "type": "IMAGE", + "links": [ + 466, + 467 + ] + }, + { + "localized_name": "reference_image_mask", + "name": "reference_image_mask", + "type": "IMAGE", + "links": [ + 465, + 472 + ] + } + ], + "properties": { + "Node name for S&R": "SCAIL2ColoredMask", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "", + "left_to_right", + true + ] + }, + { + "id": 352, + "type": "SAM3_VideoTrack", + "pos": [ + 1190, + 5640 + ], + "size": [ + 280, + 250 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "label": "images", + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 469 + }, + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 463 + }, + { + "label": "initial_mask", + "localized_name": "initial_mask", + "name": "initial_mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 488 + }, + { + "localized_name": "detection_threshold", + "name": "detection_threshold", + "type": "FLOAT", + "widget": { + "name": "detection_threshold" + }, + "link": null + }, + { + "localized_name": "max_objects", + "name": "max_objects", + "type": "INT", + "widget": { + "name": "max_objects" + }, + "link": null + }, + { + "localized_name": "detect_interval", + "name": "detect_interval", + "type": "INT", + "widget": { + "name": "detect_interval" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "track_data", + "name": "track_data", + "type": "SAM3_TRACK_DATA", + "links": [ + 464 + ] + } + ], + "properties": { + "Node name for S&R": "SAM3_VideoTrack", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0.5, + 4, + 1 + ] + }, + { + "id": 353, + "type": "PrimitiveBoolean", + "pos": [ + 1660, + 4030 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 504 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 475, + 476 + ] + } + ], + "title": "Boolean (Replace Mode)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 354, + "type": "PreviewImage", + "pos": [ + 2060, + 4500 + ], + "size": [ + 350, + 1190 + ], + "flags": {}, + "order": 32, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 466 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "PreviewImage", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 355, + "type": "PreviewImage", + "pos": [ + 2460, + 4500 + ], + "size": [ + 230, + 310 + ], + "flags": {}, + "order": 33, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 465 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "PreviewImage", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 356, + "type": "VAEDecode", + "pos": [ + 2920, + 4510 + ], + "size": [ + 270, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 407 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 483 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 357, + "type": "ImageFromBatch", + "pos": [ + 2920, + 4680 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 483 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": null + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 484 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5, + 4096 + ] + }, + { + "id": 358, + "type": "ColorTransfer", + "pos": [ + 2920, + 5050 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "image_target", + "name": "image_target", + "type": "IMAGE", + "link": 484 + }, + { + "localized_name": "image_ref", + "name": "image_ref", + "type": "IMAGE", + "link": 482 + }, + { + "localized_name": "method", + "name": "method", + "type": "COMBO", + "widget": { + "name": "method" + }, + "link": null + }, + { + "localized_name": "source_stats", + "name": "source_stats", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "source_stats" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "links": [ + 546 + ] + } + ], + "properties": { + "Node name for S&R": "ColorTransfer", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "reinhard_lab", + "per_frame", + 1 + ] + }, + { + "id": 359, + "type": "ImageFromBatch", + "pos": [ + 2920, + 4870 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 486 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": null + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 482 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + -1, + 1 + ] + }, + { + "id": 360, + "type": "PrimitiveBoolean", + "pos": [ + 2440, + 3950 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 507 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 432, + 433, + 434 + ] + } + ], + "title": "Boolean (Enable Distill LoRA?)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 361, + "type": "CLIPVisionEncode", + "pos": [ + 1230, + 4310 + ], + "size": [ + 230, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_vision", + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 196 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 408 + }, + { + "localized_name": "crop", + "name": "crop", + "type": "COMBO", + "widget": { + "name": "crop" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION_OUTPUT", + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 404 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 362, + "type": "CLIPTextEncode", + "pos": [ + 1030, + 4020 + ], + "size": [ + 520, + 210 + ], + "flags": {}, + "order": 40, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 8 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 326 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 363, + "type": "CLIPTextEncode", + "pos": [ + 1020, + 3180 + ], + "size": [ + 520, + 720 + ], + "flags": {}, + "order": 41, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 7 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 499 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 325 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "A young woman with dark hair tied in a neat high bun, with a few loose strands framing her face, is dancing outdoors on a sunny coastal hillside. She has a normal-sized head and a slim face, with no hat, no headwear, and no oversized hair volume. She wears a fitted black long-sleeve crop top with a shoulder cutout, extremely baggy black cargo pants with straps and pockets, and chunky black combat boots. She performs energetic dance moves with one leg lifted and arms extended, moving naturally in front of a large tree, a small white stone house with a terracotta roof, and a bright blue sea under a clear sky with light clouds." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 364, + "type": "WanSCAILToVideo", + "pos": [ + 1650, + 3310 + ], + "size": [ + 310, + 580 + ], + "flags": {}, + "order": 42, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 325 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 326 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 406 + }, + { + "localized_name": "pose_video", + "name": "pose_video", + "shape": 7, + "type": "IMAGE", + "link": 420 + }, + { + "localized_name": "pose_video_mask", + "name": "pose_video_mask", + "shape": 7, + "type": "IMAGE", + "link": 467 + }, + { + "localized_name": "reference_image", + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 473 + }, + { + "localized_name": "reference_image_mask", + "name": "reference_image_mask", + "shape": 7, + "type": "IMAGE", + "link": 472 + }, + { + "localized_name": "clip_vision_output", + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 404 + }, + { + "localized_name": "previous_frames", + "name": "previous_frames", + "shape": 7, + "type": "IMAGE", + "link": 485 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 414 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 415 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 416 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + }, + { + "localized_name": "pose_strength", + "name": "pose_strength", + "type": "FLOAT", + "widget": { + "name": "pose_strength" + }, + "link": 552 + }, + { + "localized_name": "pose_start", + "name": "pose_start", + "type": "FLOAT", + "widget": { + "name": "pose_start" + }, + "link": 553 + }, + { + "localized_name": "pose_end", + "name": "pose_end", + "type": "FLOAT", + "widget": { + "name": "pose_end" + }, + "link": 554 + }, + { + "localized_name": "video_frame_offset", + "name": "video_frame_offset", + "type": "INT", + "widget": { + "name": "video_frame_offset" + }, + "link": null + }, + { + "localized_name": "previous_frame_count", + "name": "previous_frame_count", + "type": "INT", + "widget": { + "name": "previous_frame_count" + }, + "link": 551 + }, + { + "localized_name": "replacement_mode", + "name": "replacement_mode", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "replacement_mode" + }, + "link": 475 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 333 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 334 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 335 + ] + }, + { + "localized_name": "video_frame_offset", + "name": "video_frame_offset", + "type": "INT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "WanSCAILToVideo", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 512, + 896, + 65, + 1, + 1, + 0, + 1, + 0, + 5, + true + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 365, + "type": "CLIPTextEncode", + "pos": [ + 670, + 5710 + ], + "size": [ + 380, + 160 + ], + "flags": {}, + "order": 43, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 489 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 514 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 488 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 366, + "type": "LoraLoaderModelOnly", + "pos": [ + 300, + 3390 + ], + "size": [ + 580, + 140 + ], + "flags": {}, + "order": 44, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 568 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 569 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 566, + 567 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/SCAIL-2/resolve/main/loras/wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "wan2.1_SCAIL_2_DPO_lora_bf16.safetensors", + 1 + ] + } + ], + "groups": [ + { + "id": 3, + "title": "Models", + "bounding": [ + 240, + 3100, + 720, + 1300 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "Prompt", + "bounding": [ + 990, + 3100, + 580, + 1300 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 5, + "title": "Sampling", + "bounding": [ + 2770, + 3100, + 700, + 1300 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 6, + "title": "SCAIL Conditioning", + "bounding": [ + 1590, + 3100, + 430, + 1300 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 9, + "title": "Switch Settings", + "bounding": [ + 2050, + 3100, + 690, + 1300 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 7, + "title": "Original Settings", + "bounding": [ + 2080, + 3330, + 300, + 340 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 8, + "title": "Distill LoRA Settings", + "bounding": [ + 2080, + 3690, + 300, + 370 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 10, + "title": "Create Mask", + "bounding": [ + 240, + 5250, + 1780, + 670 + ], + "flags": {} + }, + { + "id": 11, + "title": "Resize Images", + "bounding": [ + 240, + 4770, + 1780, + 450 + ], + "flags": {} + }, + { + "id": 12, + "title": "Trim Frames from Video", + "bounding": [ + 240, + 4430, + 1780, + 310 + ], + "flags": {} + }, + { + "id": 13, + "title": "Preview Masks", + "bounding": [ + 2050, + 4430, + 690, + 1490 + ], + "flags": {} + }, + { + "id": 14, + "title": "Group", + "bounding": [ + 2770, + 4430, + 700, + 1490 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 491, + "origin_id": 341, + "origin_slot": 0, + "target_id": 324, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 444, + "origin_id": 344, + "origin_slot": 1, + "target_id": 324, + "target_slot": 2, + "type": "INT" + }, + { + "id": 446, + "origin_id": 345, + "origin_slot": 1, + "target_id": 324, + "target_slot": 3, + "type": "INT" + }, + { + "id": 492, + "origin_id": 324, + "origin_slot": 0, + "target_id": 326, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 424, + "origin_id": 338, + "origin_slot": 0, + "target_id": 330, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 398, + "origin_id": 322, + "origin_slot": 0, + "target_id": 332, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 428, + "origin_id": 339, + "origin_slot": 0, + "target_id": 332, + "target_slot": 2, + "type": "INT" + }, + { + "id": 417, + "origin_id": 330, + "origin_slot": 0, + "target_id": 333, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 333, + "origin_id": 364, + "origin_slot": 0, + "target_id": 333, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 334, + "origin_id": 364, + "origin_slot": 1, + "target_id": 333, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 419, + "origin_id": 331, + "origin_slot": 0, + "target_id": 333, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 418, + "origin_id": 332, + "origin_slot": 0, + "target_id": 333, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 335, + "origin_id": 364, + "origin_slot": 2, + "target_id": 333, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 431, + "origin_id": 340, + "origin_slot": 0, + "target_id": 333, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 425, + "origin_id": 322, + "origin_slot": 0, + "target_id": 338, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 432, + "origin_id": 360, + "origin_slot": 0, + "target_id": 338, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 426, + "origin_id": 336, + "origin_slot": 0, + "target_id": 339, + "target_slot": 0, + "type": "INT" + }, + { + "id": 427, + "origin_id": 334, + "origin_slot": 0, + "target_id": 339, + "target_slot": 1, + "type": "INT" + }, + { + "id": 433, + "origin_id": 360, + "origin_slot": 0, + "target_id": 339, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 429, + "origin_id": 337, + "origin_slot": 0, + "target_id": 340, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 430, + "origin_id": 335, + "origin_slot": 0, + "target_id": 340, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 434, + "origin_id": 360, + "origin_slot": 0, + "target_id": 340, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 490, + "origin_id": 325, + "origin_slot": 0, + "target_id": 341, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 450, + "origin_id": 347, + "origin_slot": 1, + "target_id": 341, + "target_slot": 1, + "type": "INT" + }, + { + "id": 441, + "origin_id": 342, + "origin_slot": 0, + "target_id": 344, + "target_slot": 0, + "type": "INT" + }, + { + "id": 445, + "origin_id": 343, + "origin_slot": 0, + "target_id": 345, + "target_slot": 0, + "type": "INT" + }, + { + "id": 447, + "origin_id": 346, + "origin_slot": 0, + "target_id": 347, + "target_slot": 0, + "type": "INT" + }, + { + "id": 454, + "origin_id": 349, + "origin_slot": 1, + "target_id": 348, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 493, + "origin_id": 324, + "origin_slot": 0, + "target_id": 350, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 458, + "origin_id": 349, + "origin_slot": 0, + "target_id": 350, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 461, + "origin_id": 348, + "origin_slot": 0, + "target_id": 350, + "target_slot": 3, + "type": "CONDITIONING" + }, + { + "id": 460, + "origin_id": 350, + "origin_slot": 0, + "target_id": 351, + "target_slot": 0, + "type": "SAM3_TRACK_DATA" + }, + { + "id": 464, + "origin_id": 352, + "origin_slot": 0, + "target_id": 351, + "target_slot": 1, + "type": "SAM3_TRACK_DATA" + }, + { + "id": 476, + "origin_id": 353, + "origin_slot": 0, + "target_id": 351, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 463, + "origin_id": 349, + "origin_slot": 0, + "target_id": 352, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 488, + "origin_id": 365, + "origin_slot": 0, + "target_id": 352, + "target_slot": 3, + "type": "CONDITIONING" + }, + { + "id": 466, + "origin_id": 351, + "origin_slot": 0, + "target_id": 354, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 465, + "origin_id": 351, + "origin_slot": 1, + "target_id": 355, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 124, + "origin_id": 333, + "origin_slot": 1, + "target_id": 356, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 407, + "origin_id": 323, + "origin_slot": 0, + "target_id": 356, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 483, + "origin_id": 356, + "origin_slot": 0, + "target_id": 357, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 484, + "origin_id": 357, + "origin_slot": 0, + "target_id": 358, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 482, + "origin_id": 359, + "origin_slot": 0, + "target_id": 358, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 196, + "origin_id": 328, + "origin_slot": 0, + "target_id": 361, + "target_slot": 0, + "type": "CLIP_VISION" + }, + { + "id": 8, + "origin_id": 327, + "origin_slot": 0, + "target_id": 362, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 7, + "origin_id": 327, + "origin_slot": 0, + "target_id": 363, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 325, + "origin_id": 363, + "origin_slot": 0, + "target_id": 364, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 326, + "origin_id": 362, + "origin_slot": 0, + "target_id": 364, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 406, + "origin_id": 323, + "origin_slot": 0, + "target_id": 364, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 420, + "origin_id": 324, + "origin_slot": 0, + "target_id": 364, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 467, + "origin_id": 351, + "origin_slot": 0, + "target_id": 364, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 472, + "origin_id": 351, + "origin_slot": 1, + "target_id": 364, + "target_slot": 6, + "type": "IMAGE" + }, + { + "id": 404, + "origin_id": 361, + "origin_slot": 0, + "target_id": 364, + "target_slot": 7, + "type": "CLIP_VISION_OUTPUT" + }, + { + "id": 414, + "origin_id": 326, + "origin_slot": 0, + "target_id": 364, + "target_slot": 9, + "type": "INT" + }, + { + "id": 415, + "origin_id": 326, + "origin_slot": 1, + "target_id": 364, + "target_slot": 10, + "type": "INT" + }, + { + "id": 416, + "origin_id": 326, + "origin_slot": 2, + "target_id": 364, + "target_slot": 11, + "type": "INT" + }, + { + "id": 475, + "origin_id": 353, + "origin_slot": 0, + "target_id": 364, + "target_slot": 18, + "type": "BOOLEAN" + }, + { + "id": 489, + "origin_id": 349, + "origin_slot": 1, + "target_id": 365, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 409, + "origin_id": -10, + "origin_slot": 0, + "target_id": 325, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 469, + "origin_id": -10, + "origin_slot": 1, + "target_id": 352, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 408, + "origin_id": -10, + "origin_slot": 1, + "target_id": 361, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 473, + "origin_id": -10, + "origin_slot": 1, + "target_id": 364, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 486, + "origin_id": -10, + "origin_slot": 2, + "target_id": 359, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 485, + "origin_id": -10, + "origin_slot": 2, + "target_id": 364, + "target_slot": 8, + "type": "IMAGE" + }, + { + "id": 499, + "origin_id": -10, + "origin_slot": 3, + "target_id": 363, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 502, + "origin_id": -10, + "origin_slot": 4, + "target_id": 346, + "target_slot": 0, + "type": "INT" + }, + { + "id": 504, + "origin_id": -10, + "origin_slot": 5, + "target_id": 353, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 505, + "origin_id": -10, + "origin_slot": 6, + "target_id": 342, + "target_slot": 0, + "type": "INT" + }, + { + "id": 506, + "origin_id": -10, + "origin_slot": 7, + "target_id": 343, + "target_slot": 0, + "type": "INT" + }, + { + "id": 507, + "origin_id": -10, + "origin_slot": 13, + "target_id": 360, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 508, + "origin_id": -10, + "origin_slot": 14, + "target_id": 329, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 509, + "origin_id": -10, + "origin_slot": 15, + "target_id": 322, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 510, + "origin_id": -10, + "origin_slot": 17, + "target_id": 327, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 511, + "origin_id": -10, + "origin_slot": 18, + "target_id": 323, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 512, + "origin_id": -10, + "origin_slot": 19, + "target_id": 328, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 513, + "origin_id": -10, + "origin_slot": 20, + "target_id": 348, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 514, + "origin_id": -10, + "origin_slot": 21, + "target_id": 365, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 515, + "origin_id": -10, + "origin_slot": 22, + "target_id": 349, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 546, + "origin_id": 358, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 550, + "origin_id": -10, + "origin_slot": 8, + "target_id": 341, + "target_slot": 2, + "type": "INT" + }, + { + "id": 551, + "origin_id": -10, + "origin_slot": 9, + "target_id": 364, + "target_slot": 17, + "type": "INT" + }, + { + "id": 552, + "origin_id": -10, + "origin_slot": 10, + "target_id": 364, + "target_slot": 13, + "type": "FLOAT" + }, + { + "id": 553, + "origin_id": -10, + "origin_slot": 11, + "target_id": 364, + "target_slot": 14, + "type": "FLOAT" + }, + { + "id": 554, + "origin_id": -10, + "origin_slot": 12, + "target_id": 364, + "target_slot": 15, + "type": "FLOAT" + }, + { + "id": 556, + "origin_id": -10, + "origin_slot": 9, + "target_id": 347, + "target_slot": 2, + "type": "FLOAT,INT,BOOLEAN" + }, + { + "id": 557, + "origin_id": -10, + "origin_slot": 8, + "target_id": 347, + "target_slot": 1, + "type": "FLOAT,INT,BOOLEAN" + }, + { + "id": 559, + "origin_id": -10, + "origin_slot": 23, + "target_id": 333, + "target_slot": 7, + "type": "INT" + }, + { + "id": 566, + "origin_id": 366, + "origin_slot": 0, + "target_id": 322, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 567, + "origin_id": 366, + "origin_slot": 0, + "target_id": 338, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 568, + "origin_id": 329, + "origin_slot": 0, + "target_id": 366, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 569, + "origin_id": -10, + "origin_slot": 16, + "target_id": 366, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video generation and editing/Video Edit", + "description": "Replaces a character in a video with a reference image using the SCAIL-2 model for end-to-end controlled animation without intermediate pose maps. Key inputs include a source video, a reference character image, and optional text prompts for style or context. Suitable for animated or live-action footage, multi-character scenes, and creative video editing where direct pose-free animation is needed; works best with moderate-length videos." + } + ] + }, + "extra": { + "BlueprintDescription": "Replaces a character in a video with a reference image using the SCAIL-2 model for end-to-end controlled animation without intermediate pose maps. Key inputs include a source video, a reference character image, and optional text prompts for style or context. Suitable for animated or live-action footage, multi-character scenes, and creative video editing where direct pose-free animation is needed; works best with moderate-length videos.", + "BlueprintSearchAliases": [ + "character replacement", + "SCAIL-2 extend", + "video character swap" + ] + } +} \ No newline at end of file diff --git a/blueprints/Image Depth Estimation (Depth Anything 3).json b/blueprints/Image Depth Estimation (Depth Anything 3).json new file mode 100644 index 000000000..3c5f24845 --- /dev/null +++ b/blueprints/Image Depth Estimation (Depth Anything 3).json @@ -0,0 +1,569 @@ +{ + "revision": 0, + "last_node_id": 89, + "last_link_id": 0, + "nodes": [ + { + "id": 89, + "type": "85e595bd-af9e-40ee-85c5-b98bb15da47a", + "pos": [ + 320, + 520 + ], + "size": [ + 400, + 360 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "resolution", + "type": "INT", + "widget": { + "name": "resolution" + }, + "link": null + }, + { + "name": "resize_method", + "type": "COMBO", + "widget": { + "name": "resize_method" + }, + "link": null + }, + { + "label": "output_type", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "output" + }, + "link": null + }, + { + "label": "output_normalization", + "name": "output.normalization", + "type": "COMBO", + "widget": { + "name": "output.normalization" + }, + "link": null + }, + { + "label": "apply_sky_clip", + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "widget": { + "name": "output.apply_sky_clip" + }, + "link": null + }, + { + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "87", + "resolution" + ], + [ + "87", + "resize_method" + ], + [ + "86", + "output" + ], + [ + "86", + "output.normalization" + ], + [ + "86", + "output.apply_sky_clip" + ], + [ + "88", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Image Depth Estimation (Depth Anything 3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "85e595bd-af9e-40ee-85c5-b98bb15da47a", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 89, + "lastLinkId": 109, + "lastRerouteId": 0 + }, + "revision": 2, + "config": {}, + "name": "Image Depth Estimation (Depth Anything 3)", + "inputNode": { + "id": -10, + "bounding": [ + 400, + 90, + 166.998046875, + 188 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1250, + 146, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "43cf3118-495a-487d-8eb3-a17c7e92f64f", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 19 + ], + "localized_name": "image", + "pos": [ + 542.998046875, + 114 + ] + }, + { + "id": "1089a0a1-6db1-45a8-84b0-0bfdc2ed920a", + "name": "resolution", + "type": "INT", + "linkIds": [ + 22 + ], + "pos": [ + 542.998046875, + 134 + ] + }, + { + "id": "25fb64ac-26d5-466d-995b-6d51b9afa2c4", + "name": "resize_method", + "type": "COMBO", + "linkIds": [ + 23 + ], + "pos": [ + 542.998046875, + 154 + ] + }, + { + "id": "8acafb7c-6c8b-46b3-9d74-c563498a3af1", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "linkIds": [ + 24 + ], + "label": "output_type", + "pos": [ + 542.998046875, + 174 + ] + }, + { + "id": "1da5009b-4648-43e8-a257-16426630cf22", + "name": "output.normalization", + "type": "COMBO", + "linkIds": [ + 25 + ], + "label": "output_normalization", + "pos": [ + 542.998046875, + 194 + ] + }, + { + "id": "fd7edb33-5fb1-4538-a411-26e5039a9321", + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "linkIds": [ + 26 + ], + "label": "apply_sky_clip", + "pos": [ + 542.998046875, + 214 + ] + }, + { + "id": "b5be4c8a-b833-4f1e-8c94-3ed1dd722190", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 106 + ], + "pos": [ + 542.998046875, + 234 + ] + } + ], + "outputs": [ + { + "id": "478ab537-63bc-4d74-a9f0-c975f550880f", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 7 + ], + "localized_name": "IMAGE", + "pos": [ + 1274, + 170 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 86, + "type": "DA3Render", + "pos": [ + 800, + 310 + ], + "size": [ + 380, + 130 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "da3_geometry", + "name": "da3_geometry", + "type": "DA3_GEOMETRY", + "link": 12 + }, + { + "localized_name": "output", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "output" + }, + "link": 24 + }, + { + "localized_name": "output.normalization", + "name": "output.normalization", + "type": "COMBO", + "widget": { + "name": "output.normalization" + }, + "link": 25 + }, + { + "localized_name": "output.apply_sky_clip", + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "widget": { + "name": "output.apply_sky_clip" + }, + "link": 26 + }, + { + "name": "geometry", + "type": "DA3_GEOMETRY", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "Node name for S&R": "DA3Render", + "cnr_id": "comfy-core", + "ver": "0.19.0" + }, + "widgets_values": [ + "depth", + "v2_style", + false + ] + }, + { + "id": 87, + "type": "DA3Inference", + "pos": [ + 800, + 50 + ], + "size": [ + 390, + 130 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "da3_model", + "name": "da3_model", + "type": "DA3_MODEL", + "link": 107 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 19 + }, + { + "localized_name": "resolution", + "name": "resolution", + "type": "INT", + "widget": { + "name": "resolution" + }, + "link": 22 + }, + { + "localized_name": "resize_method", + "name": "resize_method", + "type": "COMBO", + "widget": { + "name": "resize_method" + }, + "link": 23 + }, + { + "localized_name": "mode", + "name": "mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "da3_geometry", + "name": "da3_geometry", + "type": "DA3_GEOMETRY", + "slot_index": 0, + "links": [ + 12 + ] + } + ], + "properties": { + "Node name for S&R": "DA3Inference", + "cnr_id": "comfy-core", + "ver": "0.19.0" + }, + "widgets_values": [ + 504, + "upper_bound_resize", + "mono" + ] + }, + { + "id": 88, + "type": "LoadDA3Model", + "pos": [ + 810, + -160 + ], + "size": [ + 400, + 140 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 106 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "DA3_MODEL", + "name": "DA3_MODEL", + "type": "DA3_MODEL", + "links": [ + 107 + ] + } + ], + "properties": { + "Node name for S&R": "LoadDA3Model", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "depth_anything_3_mono_large.safetensors", + "url": "https://huggingface.co/Comfy-Org/Depth-Anything-3/resolve/main/geometry_estimation/depth_anything_3_mono_large.safetensors", + "directory": "geometry_estimation" + } + ] + }, + "widgets_values": [ + "depth_anything_3_mono_large.safetensors", + "default" + ] + } + ], + "groups": [], + "links": [ + { + "id": 12, + "origin_id": 87, + "origin_slot": 0, + "target_id": 86, + "target_slot": 0, + "type": "DA3_GEOMETRY" + }, + { + "id": 19, + "origin_id": -10, + "origin_slot": 0, + "target_id": 87, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 7, + "origin_id": 86, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 22, + "origin_id": -10, + "origin_slot": 1, + "target_id": 87, + "target_slot": 2, + "type": "INT" + }, + { + "id": 23, + "origin_id": -10, + "origin_slot": 2, + "target_id": 87, + "target_slot": 3, + "type": "COMBO" + }, + { + "id": 24, + "origin_id": -10, + "origin_slot": 3, + "target_id": 86, + "target_slot": 1, + "type": "COMFY_DYNAMICCOMBO_V3" + }, + { + "id": 25, + "origin_id": -10, + "origin_slot": 4, + "target_id": 86, + "target_slot": 2, + "type": "COMBO" + }, + { + "id": 26, + "origin_id": -10, + "origin_slot": 5, + "target_id": 86, + "target_slot": 3, + "type": "BOOLEAN" + }, + { + "id": 106, + "origin_id": -10, + "origin_slot": 6, + "target_id": 88, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 107, + "origin_id": 88, + "origin_slot": 0, + "target_id": 87, + "target_slot": 0, + "type": "DA3_MODEL" + } + ], + "extra": {}, + "category": "Conditioning & Preprocessors/Depth", + "description": "This subgraph takes an input image and produces a depth map using the Depth Anything 3 model, which recovers spatially consistent geometry from any number of views. It is ideal for single or multi-view images, videos, and 3D scenes where accurate depth estimation is needed for tasks like SLAM, novel view synthesis, or spatial perception. The model uses a plain transformer backbone and supports both monocular and multi-view inputs without." + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph takes an input image and produces a depth map using the Depth Anything 3 model, which recovers spatially consistent geometry from any number of views. It is ideal for single or multi-view images, videos, and 3D scenes where accurate depth estimation is needed for tasks like SLAM, novel view synthesis, or spatial perception. The model uses a plain transformer backbone and supports both monocular and multi-view inputs without." + } +} \ No newline at end of file diff --git a/blueprints/Image Edit (Bernini-R).json b/blueprints/Image Edit (Bernini-R).json new file mode 100644 index 000000000..9ad7867a4 --- /dev/null +++ b/blueprints/Image Edit (Bernini-R).json @@ -0,0 +1,3549 @@ +{ + "revision": 0, + "last_node_id": 76, + "last_link_id": 0, + "nodes": [ + { + "id": 76, + "type": "64670aa5-a932-4e9f-a299-3bfef6ebc043", + "pos": [ + 3240, + 4340 + ], + "size": [ + 510, + 850 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "label": "source_image", + "name": "source_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "name": "reference_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "reference_image", + "name": "reference_images.reference_image_0", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "task_type", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "ref_max_size", + "shape": 7, + "type": "INT", + "widget": { + "name": "ref_max_size" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "high_noise_model", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "low_noise_model", + "name": "unet_name_1", + "type": "COMBO", + "widget": { + "name": "unet_name_1" + }, + "link": null + }, + { + "label": "lightning_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "120", + "value" + ], + [ + "54", + "choice" + ], + [ + "50", + "width" + ], + [ + "50", + "height" + ], + [ + "50", + "ref_max_size" + ], + [ + "70", + "value" + ], + [ + "5", + "unet_name" + ], + [ + "12", + "unet_name" + ], + [ + "11", + "lora_name" + ], + [ + "9", + "clip_name" + ], + [ + "7", + "vae_name" + ], + [ + "19", + "noise_seed" + ], + [ + "50", + "length" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Image Edit (Bernini-R)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "64670aa5-a932-4e9f-a299-3bfef6ebc043", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 157, + "lastLinkId": 308, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (Bernini-R)", + "inputNode": { + "id": -10, + "bounding": [ + -60, + 4230, + 159.744140625, + 368 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4720, + 4220, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "3df44016-bb49-40a2-a1dc-47d750440516", + "name": "source_video", + "type": "IMAGE", + "linkIds": [ + 294 + ], + "label": "source_image", + "pos": [ + 75.744140625, + 4254 + ] + }, + { + "id": "9e960570-35a6-4f48-bfa3-5f6ded6ede10", + "name": "reference_video", + "type": "IMAGE", + "linkIds": [ + 162 + ], + "pos": [ + 75.744140625, + 4274 + ] + }, + { + "id": "4ddf8f1f-b92a-4802-96cd-1379ce4dcba3", + "name": "reference_images.reference_image_0", + "type": "IMAGE", + "linkIds": [ + 163 + ], + "label": "reference_image", + "pos": [ + 75.744140625, + 4294 + ] + }, + { + "id": "fb6d520b-6281-4b05-8a36-64ed7ef58850", + "name": "text", + "type": "STRING", + "linkIds": [ + 308 + ], + "label": "prompt", + "pos": [ + 75.744140625, + 4314 + ] + }, + { + "id": "8005758c-ac19-4baf-8895-4661b2bf9327", + "name": "choice", + "type": "COMBO", + "linkIds": [ + 165 + ], + "label": "task_type", + "pos": [ + 75.744140625, + 4334 + ] + }, + { + "id": "8f7316d5-b98d-4702-83db-6f57fd8804c6", + "name": "width", + "type": "INT", + "linkIds": [ + 166 + ], + "pos": [ + 75.744140625, + 4354 + ] + }, + { + "id": "2dc22817-de2d-4d0f-8763-71795668c1d3", + "name": "height", + "type": "INT", + "linkIds": [ + 167 + ], + "pos": [ + 75.744140625, + 4374 + ] + }, + { + "id": "ff81d4ba-46f0-40e3-995d-50df973d951c", + "name": "ref_max_size", + "type": "INT", + "linkIds": [ + 169 + ], + "pos": [ + 75.744140625, + 4394 + ] + }, + { + "id": "2c12a5f5-e9d9-45e7-bb62-bbf7a352731f", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 170 + ], + "label": "enable_turbo_mode", + "pos": [ + 75.744140625, + 4414 + ] + }, + { + "id": "ff09c771-83a5-4d92-bc16-006e6eb19406", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 171 + ], + "label": "high_noise_model", + "pos": [ + 75.744140625, + 4434 + ] + }, + { + "id": "67aed2f7-f407-4f55-add0-6ae669a069dc", + "name": "unet_name_1", + "type": "COMBO", + "linkIds": [ + 173 + ], + "label": "low_noise_model", + "pos": [ + 75.744140625, + 4454 + ] + }, + { + "id": "7f456845-cdba-46f0-829c-31b72d4a9038", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 174, + 175 + ], + "label": "lightning_lora", + "pos": [ + 75.744140625, + 4474 + ] + }, + { + "id": "b7332485-fd38-4522-8192-ea862afe9a35", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 176 + ], + "pos": [ + 75.744140625, + 4494 + ] + }, + { + "id": "3328f3bc-c4ce-467e-830e-0e394c8da2b7", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 177 + ], + "pos": [ + 75.744140625, + 4514 + ] + }, + { + "id": "c8a71e1e-2d0b-4f9a-afd8-4186ba4cc241", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 283 + ], + "pos": [ + 75.744140625, + 4534 + ] + }, + { + "id": "3663f935-0959-4cb3-923d-d44736198056", + "name": "length", + "type": "INT", + "linkIds": [ + 295 + ], + "pos": [ + 75.744140625, + 4554 + ] + } + ], + "outputs": [ + { + "id": "5621984a-f548-41d5-b812-481a9122dd81", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 297 + ], + "pos": [ + 4744, + 4244 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 9, + "type": "CLIPLoader", + "pos": [ + 890, + 5030 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 176 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 7, + 8 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors?download=true", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 7, + "type": "VAELoader", + "pos": [ + 890, + 5260 + ], + "size": [ + 670, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 177 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 99, + 119 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "Wan2_1_VAE_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan2_1_VAE_bf16.safetensors?download=true", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Wan2_1_VAE_bf16.safetensors" + ] + }, + { + "id": 29, + "type": "LoraLoaderModelOnly", + "pos": [ + 890, + 4810 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 128 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 175 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 132 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors?download=true", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + 1.5 + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 4, + "type": "CLIPTextEncode", + "pos": [ + 1670, + 4670 + ], + "size": [ + 700, + 240 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 8 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 118 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 17, + "type": "SplitSigmas", + "pos": [ + 3240, + 4450 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 33 + }, + { + "localized_name": "step", + "name": "step", + "type": "INT", + "widget": { + "name": "step" + }, + "link": 146 + } + ], + "outputs": [ + { + "localized_name": "high_sigmas", + "name": "high_sigmas", + "type": "SIGMAS", + "links": [ + 41 + ] + }, + { + "localized_name": "low_sigmas", + "name": "low_sigmas", + "type": "SIGMAS", + "links": [ + 42 + ] + } + ], + "properties": { + "Node name for S&R": "SplitSigmas", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 27, + "type": "KSamplerSelect", + "pos": [ + 3240, + 4640 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 53, + 56 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "res_multistep" + ] + }, + { + "id": 5, + "type": "UNETLoader", + "pos": [ + 890, + 4200 + ], + "size": [ + 670, + 140 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 171 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 127, + 130 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Bernini-R/resolve/main/diffusion_models/wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 11, + "type": "LoraLoaderModelOnly", + "pos": [ + 890, + 4390 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 127 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 174 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 129 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors?download=true", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + 3 + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + 890, + 4620 + ], + "size": [ + 670, + 140 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 173 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 34, + 128, + 131 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Bernini-R/resolve/main/diffusion_models/wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 16, + "type": "VAEDecode", + "pos": [ + 4410, + 4220 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 31 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 99 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 297 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 19, + "type": "SamplerCustom", + "pos": [ + 3580, + 4220 + ], + "size": [ + 280, + 680 + ], + "flags": {}, + "order": 19, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 133 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 121 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 123 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 53 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 41 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 125 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 283 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 153 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 40 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + true, + 283365432432581, + "randomize", + 1 + ] + }, + { + "id": 18, + "type": "BasicScheduler", + "pos": [ + 3240, + 4230 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 34 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 143 + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 33 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "simple", + 6, + 1 + ] + }, + { + "id": 15, + "type": "SamplerCustom", + "pos": [ + 3910, + 4220 + ], + "size": [ + 280, + 680 + ], + "flags": {}, + "order": 15, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 134 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 122 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 124 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 56 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 42 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 40 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 154 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 31 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false, + 0, + "fixed", + 1 + ] + }, + { + "id": 50, + "type": "BerniniConditioning", + "pos": [ + 1880, + 4990 + ], + "size": [ + 310, + 380 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 117 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 118 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 119 + }, + { + "localized_name": "source_video", + "name": "source_video", + "shape": 7, + "type": "IMAGE", + "link": 294 + }, + { + "localized_name": "reference_video", + "name": "reference_video", + "shape": 7, + "type": "IMAGE", + "link": 162 + }, + { + "label": "reference_image_0", + "localized_name": "reference_images.reference_image_0", + "name": "reference_images.reference_image_0", + "shape": 7, + "type": "IMAGE", + "link": 163 + }, + { + "label": "reference_image_1", + "localized_name": "reference_images.reference_image_1", + "name": "reference_images.reference_image_1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 166 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 167 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 295 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + }, + { + "localized_name": "ref_max_size", + "name": "ref_max_size", + "shape": 7, + "type": "INT", + "widget": { + "name": "ref_max_size" + }, + "link": 169 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 121, + 122 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 123, + 124 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "BerniniConditioning", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 928, + 1280, + 1, + 1, + 848 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "CLIPTextEncode", + "pos": [ + 1670, + 4210 + ], + "size": [ + 710, + 390 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 7 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 140 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 117 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 53, + "type": "ComfySwitchNode", + "pos": [ + 2860, + 4520 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 131 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 132 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 148 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 134 + ] + } + ], + "title": "Switch (Low Noise)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 54, + "type": "CustomCombo", + "pos": [ + 890, + 3070 + ], + "size": [ + 230, + 350 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": 165 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 135 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "Default", + 0, + "Default", + "Text to Image", + "Text to Video", + "Image Editing", + "Subject to Image", + "" + ] + }, + { + "id": 57, + "type": "c39e0ea5-b767-460c-b394-b09703772fa6", + "pos": [ + 1400, + 3070 + ], + "size": [ + 390, + 440 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "text_per_line", + "name": "text_per_line", + "type": "STRING", + "widget": { + "name": "text_per_line" + }, + "link": null + }, + { + "localized_name": "index", + "name": "index", + "type": "INT", + "widget": { + "name": "index" + }, + "link": 135 + } + ], + "outputs": [ + { + "localized_name": "selected_line", + "name": "selected_line", + "type": "STRING", + "links": [ + 137 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "2", + "string" + ], + [ + "56", + "value" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [] + }, + { + "id": 59, + "type": "StringConcatenate", + "pos": [ + 1410, + 3770 + ], + "size": [ + 400, + 250 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "string_a", + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 137 + }, + { + "localized_name": "string_b", + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 307 + }, + { + "localized_name": "delimiter", + "name": "delimiter", + "type": "STRING", + "widget": { + "name": "delimiter" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 140 + ] + } + ], + "properties": { + "Node name for S&R": "StringConcatenate", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "", + "", + "" + ] + }, + { + "id": 62, + "type": "PrimitiveInt", + "pos": [ + 2460, + 4240 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 141 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 40, + "fixed" + ] + }, + { + "id": 63, + "type": "PrimitiveInt", + "pos": [ + 2470, + 4780 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 142 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 6, + "fixed" + ] + }, + { + "id": 64, + "type": "ComfySwitchNode", + "pos": [ + 2860, + 4710 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 141 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 142 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 149 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 143 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 66, + "type": "PrimitiveInt", + "pos": [ + 2460, + 4400 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 144 + ] + } + ], + "title": "Int (Split Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 67, + "type": "PrimitiveInt", + "pos": [ + 2470, + 4950 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 145 + ] + } + ], + "title": "Int (Split Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 3, + "fixed" + ] + }, + { + "id": 68, + "type": "ComfySwitchNode", + "pos": [ + 2860, + 4910 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 144 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 145 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 150 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 146 + ] + } + ], + "title": "Switch (Low Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 72, + "type": "ComfySwitchNode", + "pos": [ + 2860, + 5100 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 151 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 152 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 155 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 153, + 154 + ] + } + ], + "title": "Switch (CFG)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 73, + "type": "PrimitiveFloat", + "pos": [ + 2470, + 5110 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 152 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 71, + "type": "PrimitiveFloat", + "pos": [ + 2460, + 4560 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 151 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 70, + "type": "PrimitiveBoolean", + "pos": [ + 2470, + 5290 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 170 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 147, + 148, + 149, + 150, + 155 + ] + } + ], + "title": "Boolean (Enable Turbo LoRA?)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 52, + "type": "ComfySwitchNode", + "pos": [ + 2860, + 4330 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 130 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 129 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 147 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 133 + ] + } + ], + "title": "Switch (High Noise)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 75, + "type": "MarkdownNote", + "pos": [ + 2450, + 3950 + ], + "size": [ + 340, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "You can find the original settings here: https://github.com/bytedance/Bernini/blob/main/gradio_demo.py" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 120, + "type": "PrimitiveStringMultiline", + "pos": [ + 890, + 3750 + ], + "size": [ + 470, + 290 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 308 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 307 + ] + } + ], + "title": "Text Multiline (User Prompt)", + "properties": { + "Node name for S&R": "PrimitiveStringMultiline", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt & Conditioning", + "bounding": [ + 1650, + 4130, + 760, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 2, + "title": "Models", + "bounding": [ + 870, + 4130, + 750, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 3, + "title": "Sampling", + "bounding": [ + 3210, + 4130, + 1160, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 8, + "title": "Prompt Construction", + "bounding": [ + 870, + 2960, + 960, + 1120 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "System Prompt (Based on task type)", + "bounding": [ + 880, + 3000, + 920, + 680 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 7, + "title": "Switch Settings", + "bounding": [ + 2440, + 4130, + 740, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 5, + "title": "Distill LoRA settings", + "bounding": [ + 2450, + 4710, + 310, + 530 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 6, + "title": "Original Settings", + "bounding": [ + 2450, + 4170, + 300, + 504 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 128, + "origin_id": 12, + "origin_slot": 0, + "target_id": 29, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 8, + "origin_id": 9, + "origin_slot": 0, + "target_id": 4, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 33, + "origin_id": 18, + "origin_slot": 0, + "target_id": 17, + "target_slot": 0, + "type": "SIGMAS" + }, + { + "id": 146, + "origin_id": 68, + "origin_slot": 0, + "target_id": 17, + "target_slot": 1, + "type": "INT" + }, + { + "id": 127, + "origin_id": 5, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 31, + "origin_id": 15, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 99, + "origin_id": 7, + "origin_slot": 0, + "target_id": 16, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 133, + "origin_id": 52, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 121, + "origin_id": 50, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 123, + "origin_id": 50, + "origin_slot": 1, + "target_id": 19, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 53, + "origin_id": 27, + "origin_slot": 0, + "target_id": 19, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 41, + "origin_id": 17, + "origin_slot": 0, + "target_id": 19, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 125, + "origin_id": 50, + "origin_slot": 2, + "target_id": 19, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 153, + "origin_id": 72, + "origin_slot": 0, + "target_id": 19, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 34, + "origin_id": 12, + "origin_slot": 0, + "target_id": 18, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 143, + "origin_id": 64, + "origin_slot": 0, + "target_id": 18, + "target_slot": 2, + "type": "INT" + }, + { + "id": 134, + "origin_id": 53, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 122, + "origin_id": 50, + "origin_slot": 0, + "target_id": 15, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 124, + "origin_id": 50, + "origin_slot": 1, + "target_id": 15, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 56, + "origin_id": 27, + "origin_slot": 0, + "target_id": 15, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 42, + "origin_id": 17, + "origin_slot": 1, + "target_id": 15, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 40, + "origin_id": 19, + "origin_slot": 0, + "target_id": 15, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 154, + "origin_id": 72, + "origin_slot": 0, + "target_id": 15, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 117, + "origin_id": 3, + "origin_slot": 0, + "target_id": 50, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 118, + "origin_id": 4, + "origin_slot": 0, + "target_id": 50, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 119, + "origin_id": 7, + "origin_slot": 0, + "target_id": 50, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 7, + "origin_id": 9, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 140, + "origin_id": 59, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 131, + "origin_id": 12, + "origin_slot": 0, + "target_id": 53, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 132, + "origin_id": 29, + "origin_slot": 0, + "target_id": 53, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 148, + "origin_id": 70, + "origin_slot": 0, + "target_id": 53, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 135, + "origin_id": 54, + "origin_slot": 1, + "target_id": 57, + "target_slot": 1, + "type": "INT" + }, + { + "id": 137, + "origin_id": 57, + "origin_slot": 0, + "target_id": 59, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 141, + "origin_id": 62, + "origin_slot": 0, + "target_id": 64, + "target_slot": 0, + "type": "INT" + }, + { + "id": 142, + "origin_id": 63, + "origin_slot": 0, + "target_id": 64, + "target_slot": 1, + "type": "INT" + }, + { + "id": 149, + "origin_id": 70, + "origin_slot": 0, + "target_id": 64, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 144, + "origin_id": 66, + "origin_slot": 0, + "target_id": 68, + "target_slot": 0, + "type": "INT" + }, + { + "id": 145, + "origin_id": 67, + "origin_slot": 0, + "target_id": 68, + "target_slot": 1, + "type": "INT" + }, + { + "id": 150, + "origin_id": 70, + "origin_slot": 0, + "target_id": 68, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 151, + "origin_id": 71, + "origin_slot": 0, + "target_id": 72, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 152, + "origin_id": 73, + "origin_slot": 0, + "target_id": 72, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 155, + "origin_id": 70, + "origin_slot": 0, + "target_id": 72, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 130, + "origin_id": 5, + "origin_slot": 0, + "target_id": 52, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 129, + "origin_id": 11, + "origin_slot": 0, + "target_id": 52, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 147, + "origin_id": 70, + "origin_slot": 0, + "target_id": 52, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 162, + "origin_id": -10, + "origin_slot": 1, + "target_id": 50, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 163, + "origin_id": -10, + "origin_slot": 2, + "target_id": 50, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 165, + "origin_id": -10, + "origin_slot": 4, + "target_id": 54, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 166, + "origin_id": -10, + "origin_slot": 5, + "target_id": 50, + "target_slot": 7, + "type": "INT" + }, + { + "id": 167, + "origin_id": -10, + "origin_slot": 6, + "target_id": 50, + "target_slot": 8, + "type": "INT" + }, + { + "id": 169, + "origin_id": -10, + "origin_slot": 7, + "target_id": 50, + "target_slot": 11, + "type": "INT" + }, + { + "id": 170, + "origin_id": -10, + "origin_slot": 8, + "target_id": 70, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 171, + "origin_id": -10, + "origin_slot": 9, + "target_id": 5, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 173, + "origin_id": -10, + "origin_slot": 10, + "target_id": 12, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 174, + "origin_id": -10, + "origin_slot": 11, + "target_id": 11, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 175, + "origin_id": -10, + "origin_slot": 11, + "target_id": 29, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 176, + "origin_id": -10, + "origin_slot": 12, + "target_id": 9, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 177, + "origin_id": -10, + "origin_slot": 13, + "target_id": 7, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 283, + "origin_id": -10, + "origin_slot": 14, + "target_id": 19, + "target_slot": 7, + "type": "INT" + }, + { + "id": 294, + "origin_id": -10, + "origin_slot": 0, + "target_id": 50, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 295, + "origin_id": -10, + "origin_slot": 15, + "target_id": 50, + "target_slot": 9, + "type": "INT" + }, + { + "id": 297, + "origin_id": 16, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 307, + "origin_id": 120, + "origin_slot": 0, + "target_id": 59, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 308, + "origin_id": -10, + "origin_slot": 3, + "target_id": 120, + "target_slot": 0, + "type": "STRING" + } + ], + "extra": {}, + "category": "Image generation and editing/Edit image", + "description": "Edits a single image using a text prompt, leveraging Bernini-R's latent semantic planning for changes like object addition, removal, or style transfer. Ideal for creative edits requiring precise semantic understanding, such as adding a snowman to a scene or altering an object's appearance." + }, + { + "id": "c39e0ea5-b767-460c-b394-b09703772fa6", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 157, + "lastLinkId": 308, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Select Per-Line Text by Index", + "description": "Selects one line from multiline text by zero-based index for batch or list-driven prompt workflows.", + "inputNode": { + "id": -10, + "bounding": [ + -990, + 8595, + 128, + 88 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 710, + 8585, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "75417d82-a934-4ac9-b667-d8dcd5a3bfb3", + "name": "text_per_line", + "type": "STRING", + "linkIds": [ + 13 + ], + "localized_name": "text_per_line", + "pos": [ + -886, + 8619 + ] + }, + { + "id": "46e69a73-1804-4ca6-9175-31445bf0be96", + "name": "index", + "type": "INT", + "linkIds": [ + 14 + ], + "localized_name": "index", + "pos": [ + -886, + 8639 + ] + } + ], + "outputs": [ + { + "id": "e34e8ad1-84d2-4bd2-a460-eb7de6067c10", + "name": "selected_line", + "type": "STRING", + "linkIds": [ + 10 + ], + "localized_name": "selected_line", + "pos": [ + 734, + 8609 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "PreviewAny", + "pos": [ + -500, + 8400 + ], + "size": [ + 230, + 180 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 1 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 2, + "type": "RegexExtract", + "pos": [ + -240, + 8740 + ], + "size": [ + 470, + 460 + ], + "flags": {}, + "order": 1, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 13 + }, + { + "localized_name": "regex_pattern", + "name": "regex_pattern", + "type": "STRING", + "widget": { + "name": "regex_pattern" + }, + "link": 9 + }, + { + "localized_name": "mode", + "name": "mode", + "type": "COMBO", + "widget": { + "name": "mode" + }, + "link": null + }, + { + "localized_name": "case_insensitive", + "name": "case_insensitive", + "type": "BOOLEAN", + "widget": { + "name": "case_insensitive" + }, + "link": null + }, + { + "localized_name": "multiline", + "name": "multiline", + "type": "BOOLEAN", + "widget": { + "name": "multiline" + }, + "link": null + }, + { + "localized_name": "dotall", + "name": "dotall", + "type": "BOOLEAN", + "widget": { + "name": "dotall" + }, + "link": null + }, + { + "localized_name": "group_index", + "name": "group_index", + "type": "INT", + "widget": { + "name": "group_index" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 10 + ] + } + ], + "properties": { + "Node name for S&R": "RegexExtract", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "You are a helpful assistant.\nYou are a helpful assistant specialized in text-to-image generation.\nYou are a helpful assistant specialized in text-to-video generation.\nYou are a helpful assistant specialized in image editing.\nYou are a helpful assistant specialized in subject-to-image generation.", + "", + "First Group", + false, + false, + false, + 1 + ] + }, + { + "id": 56, + "type": "PrimitiveInt", + "pos": [ + -810, + 8400 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 14 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 1 + ] + } + ], + "title": "Int (line index)", + "properties": { + "Node name for S&R": "Int (line index)", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 8, + "type": "StringReplace", + "pos": [ + -240, + 8400 + ], + "size": [ + 400, + 280 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": null + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 6 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 9 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "^(?:[^\\n]*\\n){index}([^\\n]*)(?:\\n|$)", + "index", + "" + ] + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": 56, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "INT" + }, + { + "id": 9, + "origin_id": 8, + "origin_slot": 0, + "target_id": 2, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 6, + "origin_id": 1, + "origin_slot": 0, + "target_id": 8, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 10, + "origin_id": 2, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 13, + "origin_id": -10, + "origin_slot": 0, + "target_id": 2, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 14, + "origin_id": -10, + "origin_slot": 1, + "target_id": 56, + "target_slot": 0, + "type": "INT" + } + ], + "extra": { + "ue_links": [], + "links_added_by_ue": [] + } + } + ] + }, + "extra": { + "BlueprintDescription": "Edits a single image using a text prompt, leveraging Bernini-R's latent semantic planning for changes like object addition, removal, or style transfer. Ideal for creative edits requiring precise semantic understanding, such as adding a snowman to a scene or altering an object's appearance." + } +} \ No newline at end of file diff --git a/blueprints/Image to Gaussian Splat (TripoSplat).json b/blueprints/Image to Gaussian Splat (TripoSplat).json new file mode 100644 index 000000000..13da12be3 --- /dev/null +++ b/blueprints/Image to Gaussian Splat (TripoSplat).json @@ -0,0 +1,1983 @@ +{ + "revision": 0, + "last_node_id": 118, + "last_link_id": 0, + "nodes": [ + { + "id": 118, + "type": "6c0b94f2-a3d2-4056-93f7-4072126afe39", + "pos": [ + 790, + 1900 + ], + "size": [ + 430, + 670 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "auto_remove_background", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": null + }, + { + "label": "num_gaussians", + "name": "num_gaussians_1", + "type": "INT", + "widget": { + "name": "num_gaussians_1" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "label": "splat_vae", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "flux2_vae", + "name": "vae_name_1", + "type": "COMBO", + "widget": { + "name": "vae_name_1" + }, + "link": null + }, + { + "label": "bg_removal_model", + "name": "bg_removal_name", + "type": "COMBO", + "widget": { + "name": "bg_removal_name" + }, + "link": null + }, + { + "label": "mask", + "name": "on_false", + "type": "MASK", + "link": null + }, + { + "label": "enable_preview", + "name": "switch_1", + "type": "BOOLEAN", + "widget": { + "name": "switch_1" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "splat", + "name": "splat", + "type": "SPLAT", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "103", + "switch" + ], + [ + "108", + "num_gaussians" + ], + [ + "102", + "seed" + ], + [ + "111", + "unet_name" + ], + [ + "105", + "clip_name" + ], + [ + "106", + "vae_name" + ], + [ + "107", + "vae_name" + ], + [ + "109", + "bg_removal_name" + ], + [ + "112", + "switch" + ], + [ + "101", + "$$canvas-image-preview" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Image to Gaussian Splat (TripoSplat)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "6c0b94f2-a3d2-4056-93f7-4072126afe39", + "version": 1, + "state": { + "lastGroupId": 6, + "lastNodeId": 118, + "lastLinkId": 219, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Gaussian Splat (TripoSplat)", + "inputNode": { + "id": -10, + "bounding": [ + -110, + 1050, + 194.666015625, + 268 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2250, + 700, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "736cc4fd-77da-4fb7-8db7-d94296654571", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 165, + 195 + ], + "localized_name": "image", + "pos": [ + 60.666015625, + 1074 + ] + }, + { + "id": "bc100771-6c6a-4223-a6b1-62f8c7b1c5b6", + "name": "switch", + "type": "BOOLEAN", + "linkIds": [ + 184 + ], + "label": "auto_remove_background", + "pos": [ + 60.666015625, + 1094 + ] + }, + { + "id": "13a86171-bf5d-4328-ae51-116502beb274", + "name": "num_gaussians_1", + "type": "INT", + "linkIds": [ + 196 + ], + "label": "num_gaussians", + "pos": [ + 60.666015625, + 1114 + ] + }, + { + "id": "38ebeca8-4674-4a73-a686-f8b9444c7bb6", + "name": "seed", + "type": "INT", + "linkIds": [ + 197 + ], + "pos": [ + 60.666015625, + 1134 + ] + }, + { + "id": "6952068a-04ba-4e56-b702-0986f69b0d96", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 201 + ], + "pos": [ + 60.666015625, + 1154 + ] + }, + { + "id": "206819d5-2ecb-4788-8dcf-134138e46bbf", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 202 + ], + "pos": [ + 60.666015625, + 1174 + ] + }, + { + "id": "83a8d3d1-4c75-4ce0-a03c-face70f57ac9", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 203 + ], + "label": "splat_vae", + "pos": [ + 60.666015625, + 1194 + ] + }, + { + "id": "dce14e7e-37ca-493b-8dbe-24333d7cee63", + "name": "vae_name_1", + "type": "COMBO", + "linkIds": [ + 204 + ], + "label": "flux2_vae", + "pos": [ + 60.666015625, + 1214 + ] + }, + { + "id": "d9c843e7-567b-4acc-813d-752579cb89b5", + "name": "bg_removal_name", + "type": "COMBO", + "linkIds": [ + 205 + ], + "label": "bg_removal_model", + "pos": [ + 60.666015625, + 1234 + ] + }, + { + "id": "07d672fb-6e35-44d8-b973-7577c3468208", + "name": "on_false", + "type": "MASK", + "linkIds": [ + 209 + ], + "label": "mask", + "pos": [ + 60.666015625, + 1254 + ] + }, + { + "id": "8ae4f3b4-e3d2-4d42-84fa-4e9af9ffc3e7", + "name": "switch_1", + "type": "BOOLEAN", + "linkIds": [ + 212 + ], + "label": "enable_preview", + "pos": [ + 60.666015625, + 1274 + ] + } + ], + "outputs": [ + { + "id": "4185ff5d-4179-4938-9fb6-cea3a8322606", + "name": "splat", + "type": "SPLAT", + "linkIds": [ + 156, + 156 + ], + "localized_name": "splat", + "pos": [ + 2274, + 724 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 100, + "type": "TripoSplatConditioning", + "pos": [ + 940, + 800 + ], + "size": [ + 290, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_vision", + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 45 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 46 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 47 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 48 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 49 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 54 + ] + } + ], + "properties": { + "Node name for S&R": "TripoSplatConditioning", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 101, + "type": "PreviewImage", + "pos": [ + 1360, + 1290 + ], + "size": [ + 440, + 530 + ], + "flags": {}, + "order": 1, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 38 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "PreviewImage", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 102, + "type": "KSampler", + "pos": [ + 1350, + 570 + ], + "size": [ + 290, + 590 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 211 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 48 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 49 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 54 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 197 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 103 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 46, + "fixed", + 20, + 3, + "dpmpp_2m", + "simple", + 1 + ] + }, + { + "id": 103, + "type": "ComfySwitchNode", + "pos": [ + 1020, + 1330 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 208 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 170 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 184 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 95 + ] + } + ], + "title": "Switch: Mask Source", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + true + ] + }, + { + "id": 104, + "type": "TripoSplatPreprocessImage", + "pos": [ + 940, + 990 + ], + "size": [ + 300, + 160 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 195 + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 95 + }, + { + "localized_name": "erode_radius", + "name": "erode_radius", + "type": "INT", + "widget": { + "name": "erode_radius" + }, + "link": null + }, + { + "localized_name": "size", + "name": "size", + "type": "INT", + "widget": { + "name": "size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "links": [ + 38, + 47 + ] + } + ], + "properties": { + "Node name for S&R": "TripoSplatPreprocessImage", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + 1024 + ] + }, + { + "id": 105, + "type": "CLIPVisionLoader", + "pos": [ + 410, + 760 + ], + "size": [ + 420, + 140 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 202 + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION", + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 45 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "models": [ + { + "name": "dino_v3_vit_h.safetensors", + "url": "https://huggingface.co/VAST-AI/TripoSplat/resolve/main/clip_vision/dino_v3_vit_h.safetensors", + "directory": "clip_vision" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "dino_v3_vit_h.safetensors" + ] + }, + { + "id": 106, + "type": "VAELoader", + "pos": [ + 420, + 930 + ], + "size": [ + 410, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 203 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 104, + 216 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "models": [ + { + "name": "triposplat_vae_decoder_fp16.safetensors", + "url": "https://huggingface.co/VAST-AI/TripoSplat/resolve/main/vae/triposplat_vae_decoder_fp16.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "triposplat_vae_decoder_fp16.safetensors" + ] + }, + { + "id": 107, + "type": "VAELoader", + "pos": [ + 410, + 1120 + ], + "size": [ + 420, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 204 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 46 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "models": [ + { + "name": "flux2-vae.safetensors", + "url": "https://huggingface.co/VAST-AI/TripoSplat/resolve/main/vae/flux2-vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "flux2-vae.safetensors" + ] + }, + { + "id": 108, + "type": "VAEDecodeTripoSplat", + "pos": [ + 1730, + 570 + ], + "size": [ + 430, + 160 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 103 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 104 + }, + { + "localized_name": "num_gaussians", + "name": "num_gaussians", + "type": "INT", + "widget": { + "name": "num_gaussians" + }, + "link": 196 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "splat", + "name": "splat", + "type": "SPLAT", + "links": [ + 156 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTripoSplat", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 262144, + 790219963981395, + "fixed" + ] + }, + { + "id": 109, + "type": "e527b93c-76f7-485d-b285-fcf78914a4d5", + "pos": [ + 410, + 1330 + ], + "size": [ + 350, + 160 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 165 + }, + { + "name": "bg_removal_name", + "type": "COMBO", + "widget": { + "name": "bg_removal_name" + }, + "link": 205 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 166, + 194 + ] + }, + { + "name": "mask", + "type": "MASK", + "links": [ + 170 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "115", + "bg_removal_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 110, + "type": "InvertMask", + "pos": [ + 430, + 1540 + ], + "size": [ + 230, + 80 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 209 + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 208 + ] + } + ], + "properties": { + "Node name for S&R": "InvertMask", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 111, + "type": "UNETLoader", + "pos": [ + 410, + 560 + ], + "size": [ + 410, + 140 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 201 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 213, + 215 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "models": [ + { + "name": "triposplat_fp16.safetensors", + "url": "https://huggingface.co/VAST-AI/TripoSplat/resolve/main/diffusion_models/triposplat_fp16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "triposplat_fp16.safetensors", + "default" + ] + }, + { + "id": 112, + "type": "ComfySwitchNode", + "pos": [ + 930, + 370 + ], + "size": [ + 300, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 213 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 217 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 212 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 211 + ] + } + ], + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + true + ] + }, + { + "id": 113, + "type": "TripoSplatSamplingPreview", + "pos": [ + 940, + 560 + ], + "size": [ + 290, + 190 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 215 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 216 + }, + { + "localized_name": "octree_level", + "name": "octree_level", + "type": "INT", + "widget": { + "name": "octree_level" + }, + "link": null + }, + { + "localized_name": "num_gaussians", + "name": "num_gaussians", + "type": "INT", + "widget": { + "name": "num_gaussians" + }, + "link": null + }, + { + "localized_name": "yaw", + "name": "yaw", + "type": "FLOAT", + "widget": { + "name": "yaw" + }, + "link": null + }, + { + "localized_name": "pitch", + "name": "pitch", + "type": "FLOAT", + "widget": { + "name": "pitch" + }, + "link": null + }, + { + "localized_name": "point_size", + "name": "point_size", + "type": "INT", + "widget": { + "name": "point_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 217 + ] + } + ], + "properties": { + "Node name for S&R": "TripoSplatSamplingPreview", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5, + 16384, + 90, + 15, + 2 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + 400, + 490, + 440, + 748.625 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 2, + "title": "Image Preprocessing", + "bounding": [ + 400, + 1260, + 910, + 370 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 45, + "origin_id": 105, + "origin_slot": 0, + "target_id": 100, + "target_slot": 0, + "type": "CLIP_VISION" + }, + { + "id": 46, + "origin_id": 107, + "origin_slot": 0, + "target_id": 100, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 47, + "origin_id": 104, + "origin_slot": 0, + "target_id": 100, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 38, + "origin_id": 104, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 103, + "origin_id": 102, + "origin_slot": 0, + "target_id": 108, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 104, + "origin_id": 106, + "origin_slot": 0, + "target_id": 108, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 48, + "origin_id": 100, + "origin_slot": 0, + "target_id": 102, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 49, + "origin_id": 100, + "origin_slot": 1, + "target_id": 102, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 54, + "origin_id": 100, + "origin_slot": 2, + "target_id": 102, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 170, + "origin_id": 109, + "origin_slot": 1, + "target_id": 103, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 95, + "origin_id": 103, + "origin_slot": 0, + "target_id": 104, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 165, + "origin_id": -10, + "origin_slot": 0, + "target_id": 109, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 156, + "origin_id": 108, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "SPLAT" + }, + { + "id": 184, + "origin_id": -10, + "origin_slot": 1, + "target_id": 103, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 195, + "origin_id": -10, + "origin_slot": 0, + "target_id": 104, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 196, + "origin_id": -10, + "origin_slot": 2, + "target_id": 108, + "target_slot": 2, + "type": "INT" + }, + { + "id": 197, + "origin_id": -10, + "origin_slot": 3, + "target_id": 102, + "target_slot": 4, + "type": "INT" + }, + { + "id": 201, + "origin_id": -10, + "origin_slot": 4, + "target_id": 111, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 202, + "origin_id": -10, + "origin_slot": 5, + "target_id": 105, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 203, + "origin_id": -10, + "origin_slot": 6, + "target_id": 106, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 204, + "origin_id": -10, + "origin_slot": 7, + "target_id": 107, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 205, + "origin_id": -10, + "origin_slot": 8, + "target_id": 109, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 208, + "origin_id": 110, + "origin_slot": 0, + "target_id": 103, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 209, + "origin_id": -10, + "origin_slot": 9, + "target_id": 110, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 211, + "origin_id": 112, + "origin_slot": 0, + "target_id": 102, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 212, + "origin_id": -10, + "origin_slot": 10, + "target_id": 112, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 213, + "origin_id": 111, + "origin_slot": 0, + "target_id": 112, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 215, + "origin_id": 111, + "origin_slot": 0, + "target_id": 113, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 216, + "origin_id": 106, + "origin_slot": 0, + "target_id": 113, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 217, + "origin_id": 113, + "origin_slot": 0, + "target_id": 112, + "target_slot": 1, + "type": "MODEL" + } + ], + "extra": {}, + "category": "3D", + "description": "This subgraph takes a single 2D image as input and generates a variable number of 3D Gaussians (up to 262,144) as output, enabling high-quality 3D reconstruction. It is ideal for asset creation, AR/VR, game development, and simulation environments, handling diverse image styles from photos to illustrations." + }, + { + "id": "e527b93c-76f7-485d-b285-fcf78914a4d5", + "version": 1, + "state": { + "lastGroupId": 6, + "lastNodeId": 118, + "lastLinkId": 219, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Remove Background (BiRefNet)", + "description": "Removes or replaces image backgrounds using BiRefNet segmentation and alpha compositing.", + "inputNode": { + "id": -10, + "bounding": [ + -6810, + 1480, + 150.9140625, + 88 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -6169.049695722246, + 1475.2619799128663, + 128, + 88 + ] + }, + "inputs": [ + { + "id": "7bc321cd-df31-4c39-aaf7-7f0d01326189", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 5, + 7 + ], + "localized_name": "image", + "pos": [ + -6683.0859375, + 1504 + ] + }, + { + "id": "e89d2cd8-daa3-4e29-8a69-851db85072cb", + "name": "bg_removal_name", + "type": "COMBO", + "linkIds": [ + 12 + ], + "pos": [ + -6683.0859375, + 1524 + ] + } + ], + "outputs": [ + { + "id": "16e7863c-4c38-46c2-aa74-e82991fbfe8d", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 8 + ], + "localized_name": "IMAGE", + "pos": [ + -6145.049695722246, + 1499.2619799128663 + ] + }, + { + "id": "f7240c19-5b80-406e-a8e2-9b12440ee2d6", + "name": "mask", + "type": "MASK", + "linkIds": [ + 11 + ], + "pos": [ + -6145.049695722246, + 1519.2619799128663 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 114, + "type": "RemoveBackground", + "pos": [ + -6540, + 1440 + ], + "size": [ + 310, + 100 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "bg_removal_model", + "name": "bg_removal_model", + "type": "BACKGROUND_REMOVAL", + "link": 3 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 5 + } + ], + "outputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "links": [ + 4, + 11 + ] + } + ], + "properties": { + "Node name for S&R": "RemoveBackground", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 115, + "type": "LoadBackgroundRemovalModel", + "pos": [ + -6540, + 1300 + ], + "size": [ + 320, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "bg_removal_name", + "name": "bg_removal_name", + "type": "COMBO", + "widget": { + "name": "bg_removal_name" + }, + "link": 12 + } + ], + "outputs": [ + { + "localized_name": "bg_model", + "name": "bg_model", + "type": "BACKGROUND_REMOVAL", + "links": [ + 3 + ] + } + ], + "properties": { + "Node name for S&R": "LoadBackgroundRemovalModel", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "models": [ + { + "name": "birefnet.safetensors", + "url": "https://huggingface.co/Comfy-Org/BiRefNet/resolve/main/background_removal/birefnet.safetensors", + "directory": "background_removal" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "birefnet.safetensors" + ] + }, + { + "id": 116, + "type": "InvertMask", + "pos": [ + -6530, + 1570 + ], + "size": [ + 290, + 80 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 4 + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "InvertMask", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 117, + "type": "JoinImageWithAlpha", + "pos": [ + -6530, + 1670 + ], + "size": [ + 290, + 100 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 7 + }, + { + "localized_name": "alpha", + "name": "alpha", + "type": "MASK", + "link": 6 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 8 + ] + } + ], + "properties": { + "Node name for S&R": "JoinImageWithAlpha", + "cnr_id": "comfy-core", + "ver": "0.22.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + } + ], + "groups": [], + "links": [ + { + "id": 3, + "origin_id": 115, + "origin_slot": 0, + "target_id": 114, + "target_slot": 0, + "type": "BACKGROUND_REMOVAL" + }, + { + "id": 4, + "origin_id": 114, + "origin_slot": 0, + "target_id": 116, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 6, + "origin_id": 116, + "origin_slot": 0, + "target_id": 117, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 5, + "origin_id": -10, + "origin_slot": 0, + "target_id": 114, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 7, + "origin_id": -10, + "origin_slot": 0, + "target_id": 117, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 8, + "origin_id": 117, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 11, + "origin_id": 114, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 12, + "origin_id": -10, + "origin_slot": 1, + "target_id": 115, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {} + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph takes a single 2D image as input and generates a variable number of 3D Gaussians (up to 262,144) as output, enabling high-quality 3D reconstruction. It is ideal for asset creation, AR/VR, game development, and simulation environments, handling diverse image styles from photos to illustrations." + } +} \ No newline at end of file diff --git a/blueprints/Text to Image (Anima Base 1.0).json b/blueprints/Text to Image (Anima Base 1.0).json new file mode 100644 index 000000000..379e7c605 --- /dev/null +++ b/blueprints/Text to Image (Anima Base 1.0).json @@ -0,0 +1,1088 @@ +{ + "revision": 0, + "last_node_id": 60, + "last_link_id": 0, + "nodes": [ + { + "id": 60, + "type": "a3c0dab6-b250-4585-a0f9-8fb8b074fb2f", + "pos": [ + -10, + 130 + ], + "size": [ + 500, + 640 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "title": "Text to Image (Anima Base 1.0)", + "properties": { + "proxyWidgets": [ + [ + "11", + "text" + ], + [ + "28", + "width" + ], + [ + "28", + "height" + ], + [ + "19", + "steps" + ], + [ + "19", + "cfg" + ], + [ + "19", + "seed" + ], + [ + "44", + "unet_name" + ], + [ + "45", + "clip_name" + ], + [ + "15", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "a3c0dab6-b250-4585-a0f9-8fb8b074fb2f", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 70, + "lastLinkId": 104, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Anima Base 1.0)", + "inputNode": { + "id": -10, + "bounding": [ + -330, + 530, + 120, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1229.9999873482075, + 505, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "4693f350-6ba0-446d-80d4-3038c661d26c", + "name": "text", + "type": "STRING", + "linkIds": [ + 95 + ], + "label": "prompt", + "pos": [ + -230, + 550 + ] + }, + { + "id": "4a7886a9-4ed7-49bb-afc2-977bb78a303d", + "name": "width", + "type": "INT", + "linkIds": [ + 96 + ], + "pos": [ + -230, + 570 + ] + }, + { + "id": "f6c04461-d29e-49e3-8790-07bb662bbbfe", + "name": "height", + "type": "INT", + "linkIds": [ + 97 + ], + "pos": [ + -230, + 590 + ] + }, + { + "id": "7a24f998-3808-4837-8bff-52304ad09fb6", + "name": "steps", + "type": "INT", + "linkIds": [ + 98 + ], + "pos": [ + -230, + 610 + ] + }, + { + "id": "aaa99698-b222-40fe-b946-28067528a85c", + "name": "cfg", + "type": "FLOAT", + "linkIds": [ + 99 + ], + "pos": [ + -230, + 630 + ] + }, + { + "id": "053df9ae-7311-4816-aa23-7fa13c656ced", + "name": "seed", + "type": "INT", + "linkIds": [ + 100 + ], + "pos": [ + -230, + 650 + ] + }, + { + "id": "c59194ea-015c-41a7-8edd-ae7ffc220b63", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 101 + ], + "pos": [ + -230, + 670 + ] + }, + { + "id": "e655aa3b-2db7-4e25-9ea2-61550fa7ae2d", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 102 + ], + "pos": [ + -230, + 690 + ] + }, + { + "id": "94965a7a-74dd-4f5a-87e3-9f87995d554f", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 103 + ], + "pos": [ + -230, + 710 + ] + } + ], + "outputs": [ + { + "id": "ef85ac0a-2152-4232-bfa1-929cfc913718", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 82 + ], + "localized_name": "IMAGE", + "pos": [ + 1249.9999873482075, + 525 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 45, + "type": "CLIPLoader", + "pos": [ + -60, + 380 + ], + "size": [ + 310, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 102 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 80, + 81 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.11.0", + "models": [ + { + "name": "qwen_3_06b_base.safetensors", + "url": "https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/text_encoders/qwen_3_06b_base.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_06b_base.safetensors", + "stable_diffusion", + "default" + ] + }, + { + "id": 15, + "type": "VAELoader", + "pos": [ + -50, + 610 + ], + "size": [ + 310, + 100 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 103 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 11 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.40", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 880, + 840 + ], + "size": [ + 230, + 90 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 10 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 11 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 82 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.40", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 28, + "type": "EmptyLatentImage", + "pos": [ + -50, + 830 + ], + "size": [ + 310, + 150 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 96 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 97 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 78 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage", + "cnr_id": "comfy-core", + "ver": "0.3.40", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 12, + "type": "CLIPTextEncode", + "pos": [ + 330, + 830 + ], + "size": [ + 490, + 140 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 81 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 40 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.65", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "worst quality, low quality, score_1, score_2, score_3, blurry, jpeg artifacts, sepia" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 19, + "type": "KSampler", + "pos": [ + 870, + 120 + ], + "size": [ + 300, + 620 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 79 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 39 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 40 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 78 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 100 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 98 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 99 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 10 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.40", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 875817230929465, + "fixed", + 30, + 4, + "er_sde", + "simple", + 1 + ] + }, + { + "id": 11, + "type": "CLIPTextEncode", + "pos": [ + 320, + 170 + ], + "size": [ + 490, + 610 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 80 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 95 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 39 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.65", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 44, + "type": "UNETLoader", + "pos": [ + -50, + 170 + ], + "size": [ + 310, + 130 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 101 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 79 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.11.0", + "models": [ + { + "name": "anima-base-v1.0.safetensors", + "url": "https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/diffusion_models/anima-base-v1.0.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "anima-base-v1.0.safetensors", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -80, + 80, + 360, + 640 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 2, + "title": "Image Size(1MP)", + "bounding": [ + -80, + 750, + 360, + 240 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + 300, + 80, + 530, + 910 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 10, + "origin_id": 19, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 11, + "origin_id": 15, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 81, + "origin_id": 45, + "origin_slot": 0, + "target_id": 12, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 79, + "origin_id": 44, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 39, + "origin_id": 11, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 40, + "origin_id": 12, + "origin_slot": 0, + "target_id": 19, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 78, + "origin_id": 28, + "origin_slot": 0, + "target_id": 19, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 80, + "origin_id": 45, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 82, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 95, + "origin_id": -10, + "origin_slot": 0, + "target_id": 11, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 96, + "origin_id": -10, + "origin_slot": 1, + "target_id": 28, + "target_slot": 0, + "type": "INT" + }, + { + "id": 97, + "origin_id": -10, + "origin_slot": 2, + "target_id": 28, + "target_slot": 1, + "type": "INT" + }, + { + "id": 98, + "origin_id": -10, + "origin_slot": 3, + "target_id": 19, + "target_slot": 5, + "type": "INT" + }, + { + "id": 99, + "origin_id": -10, + "origin_slot": 4, + "target_id": 19, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 100, + "origin_id": -10, + "origin_slot": 5, + "target_id": 19, + "target_slot": 4, + "type": "INT" + }, + { + "id": 101, + "origin_id": -10, + "origin_slot": 6, + "target_id": 44, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 102, + "origin_id": -10, + "origin_slot": 7, + "target_id": 45, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 103, + "origin_id": -10, + "origin_slot": 8, + "target_id": 15, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Image generation and editing/Text to image", + "description": "This subgraph generates non-photorealistic illustrations from text prompts using a 2-billion-parameter model optimized for anime concepts, characters, and styles. It is ideal for creating artistic images, concept art, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects." + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph generates non-photorealistic illustrations from text prompts using a 2-billion-parameter model optimized for anime concepts, characters, and styles. It is ideal for creating artistic images, concept art, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects." + } +} \ No newline at end of file diff --git a/blueprints/Text to Image (Anima).json b/blueprints/Text to Image (Anima).json index 787908ca9..dcf6e5973 100644 --- a/blueprints/Text to Image (Anima).json +++ b/blueprints/Text to Image (Anima).json @@ -1077,9 +1077,12 @@ } ], "extra": {}, - "category": "Image generation and editing/Text to image" + "category": "Image generation and editing/Text to image", + "description": "This subgraph converts text prompts into non-photorealistic illustrations using a 2-billion-parameter model optimized for anime and artistic styles. It is ideal for generating concept art, character designs, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects." } ] }, - "extra": {} + "extra": { + "BlueprintDescription": "This subgraph converts text prompts into non-photorealistic illustrations using a 2-billion-parameter model optimized for anime and artistic styles. It is ideal for generating concept art, character designs, or stylized illustrations where photorealism is not required. The model excels with anime and artistic content but performs poorly on realistic subjects." + } } \ No newline at end of file diff --git a/blueprints/Text to Image (Ideogram v4).json b/blueprints/Text to Image (Ideogram v4).json new file mode 100644 index 000000000..0d5c1d7c7 --- /dev/null +++ b/blueprints/Text to Image (Ideogram v4).json @@ -0,0 +1,2473 @@ +{ + "revision": 0, + "last_node_id": 204, + "last_link_id": 0, + "nodes": [ + { + "id": 204, + "type": "3aa9dcf6-e101-4b91-abee-6d7a7c10023b", + "pos": [ + 6380, + 1410 + ], + "size": [ + 440, + 690 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "width", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "height", + "name": "value_1", + "type": "INT", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "unconditional_unet", + "name": "unet_name_1", + "type": "COMBO", + "widget": { + "name": "unet_name_1" + }, + "link": null + }, + { + "label": "mode", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "186", + "text" + ], + [ + "188", + "value" + ], + [ + "189", + "value" + ], + [ + "184", + "noise_seed" + ], + [ + "185", + "unet_name" + ], + [ + "187", + "clip_name" + ], + [ + "177", + "vae_name" + ], + [ + "200", + "unet_name" + ], + [ + "202", + "choice" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Image (Ideogram v4)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "3aa9dcf6-e101-4b91-abee-6d7a7c10023b", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 204, + "lastLinkId": 252, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Ideogram v4)", + "inputNode": { + "id": -10, + "bounding": [ + 3490, + 920, + 154.921875, + 228 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 6850, + 936, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "4bc742d1-7b4b-452c-90d9-0d76ebcdae76", + "name": "text", + "type": "STRING", + "linkIds": [ + 152 + ], + "label": "prompt", + "pos": [ + 3620.921875, + 944 + ] + }, + { + "id": "8d4038eb-73c7-45e9-bba1-f068f55e8d32", + "name": "value", + "type": "INT", + "linkIds": [ + 153 + ], + "label": "width", + "pos": [ + 3620.921875, + 964 + ] + }, + { + "id": "281550e6-6acf-4cbe-aec1-9eb803b4dec1", + "name": "value_1", + "type": "INT", + "linkIds": [ + 154 + ], + "label": "height", + "pos": [ + 3620.921875, + 984 + ] + }, + { + "id": "fae56884-2f1a-470b-a25f-40e7a87ef69d", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 155 + ], + "pos": [ + 3620.921875, + 1004 + ] + }, + { + "id": "3497309c-a7d7-4e28-9330-142c15881632", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 156 + ], + "pos": [ + 3620.921875, + 1024 + ] + }, + { + "id": "e87126db-7147-465e-b129-370ed2c6cc22", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 157 + ], + "pos": [ + 3620.921875, + 1044 + ] + }, + { + "id": "a1e6c080-b11b-4d5c-a3a8-fcf4df654cf7", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 158 + ], + "pos": [ + 3620.921875, + 1064 + ] + }, + { + "id": "b0d16516-95de-44d9-bea8-3cd2e7c78e9a", + "name": "unet_name_1", + "type": "COMBO", + "linkIds": [ + 216 + ], + "label": "unconditional_unet", + "pos": [ + 3620.921875, + 1084 + ] + }, + { + "id": "249fd825-e6b3-489d-a341-6d8050500f5e", + "name": "choice", + "type": "COMBO", + "linkIds": [ + 219 + ], + "label": "mode", + "pos": [ + 3620.921875, + 1104 + ] + } + ], + "outputs": [ + { + "id": "b81e4f60-e543-4f02-875b-b0f1bdc274f2", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 25 + ], + "localized_name": "IMAGE", + "pos": [ + 6874, + 960 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 177, + "type": "VAELoader", + "pos": [ + 4730, + 1220 + ], + "size": [ + 470, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 158 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 17 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "models": [ + { + "name": "flux2-vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/vae/flux2-vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "flux2-vae.safetensors" + ] + }, + { + "id": 178, + "type": "ConditioningZeroOut", + "pos": [ + 5450, + 960 + ], + "size": [ + 250, + 80 + ], + "flags": { + "collapsed": false + }, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 214 + ] + } + ], + "properties": { + "Node name for S&R": "ConditioningZeroOut", + "cnr_id": "comfy-core", + "ver": "0.9.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 179, + "type": "EmptyFlux2LatentImage", + "pos": [ + 5330, + 1180 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 33 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 36 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 15 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyFlux2LatentImage", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 180, + "type": "SamplerCustomAdvanced", + "pos": [ + 6160, + 500 + ], + "size": [ + 290, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 11 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 215 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 13 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 14 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 15 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 16 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 181, + "type": "VAEDecode", + "pos": [ + 6560, + 500 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 16 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 17 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 25 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 182, + "type": "KSamplerSelect", + "pos": [ + 5790, + 1100 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 13 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 183, + "type": "Ideogram4Scheduler", + "pos": [ + 5790, + 1260 + ], + "size": [ + 270, + 240 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 207 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 34 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 37 + }, + { + "localized_name": "mu", + "name": "mu", + "type": "FLOAT", + "widget": { + "name": "mu" + }, + "link": 208 + }, + { + "localized_name": "std", + "name": "std", + "type": "FLOAT", + "widget": { + "name": "std" + }, + "link": 209 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 14 + ] + } + ], + "properties": { + "Node name for S&R": "Ideogram4Scheduler", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 1024, + 1024, + 0.5, + 1.75 + ] + }, + { + "id": 184, + "type": "RandomNoise", + "pos": [ + 5780, + 490 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 155 + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 11 + ] + } + ], + "properties": { + "Node name for S&R": "RandomNoise", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 885894517601261, + "randomize" + ] + }, + { + "id": 185, + "type": "UNETLoader", + "pos": [ + 4720, + 520 + ], + "size": [ + 470, + 170 + ], + "flags": {}, + "order": 8, + "mode": 0, + "showAdvanced": true, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 156 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 222 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "models": [ + { + "name": "ideogram4_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Ideogram-4/resolve/main/diffusion_models/ideogram4_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ideogram4_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 186, + "type": "CLIPTextEncode", + "pos": [ + 5270, + 500 + ], + "size": [ + 430, + 420 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 24 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 152 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 8, + 213 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 187, + "type": "CLIPLoader", + "pos": [ + 4730, + 990 + ], + "size": [ + 470, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 157 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 24 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "models": [ + { + "name": "qwen3vl_8b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen3-VL/resolve/main/text_encoders/qwen3vl_8b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen3vl_8b_fp8_scaled.safetensors", + "ideogram4", + "default" + ] + }, + { + "id": 188, + "type": "PrimitiveInt", + "pos": [ + 4240, + 1610 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 153 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 32 + ] + } + ], + "title": "Int (Width)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + "fixed" + ] + }, + { + "id": 189, + "type": "PrimitiveInt", + "pos": [ + 4250, + 1800 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 154 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 35 + ] + } + ], + "title": "Int (Height)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + "fixed" + ] + }, + { + "id": 190, + "type": "ComfyMathExpression", + "pos": [ + 5340, + 1400 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 13, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 32 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 33, + 34 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "max(((a + 15) // 16) * 16, 256)" + ] + }, + { + "id": 191, + "type": "ComfyMathExpression", + "pos": [ + 5350, + 1470 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 14, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 35 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 36, + 37 + ] + }, + { + "localized_name": "BOOL", + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "max(((a + 15) // 16) * 16, 256)" + ] + }, + { + "id": 192, + "type": "ComfyNumberConvert", + "pos": [ + 5920, + 1870 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "label": "value", + "localized_name": "value", + "name": "value", + "type": "INT,FLOAT,STRING,BOOLEAN", + "link": 195 + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 208 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyNumberConvert", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 193, + "type": "JsonExtractString", + "pos": [ + 5450, + 1870 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "json_string", + "name": "json_string", + "type": "STRING", + "widget": { + "name": "json_string" + }, + "link": 196 + }, + { + "localized_name": "key", + "name": "key", + "type": "STRING", + "widget": { + "name": "key" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 195 + ] + } + ], + "properties": { + "Node name for S&R": "JsonExtractString", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "mu" + ] + }, + { + "id": 194, + "type": "ComfyNumberConvert", + "pos": [ + 5930, + 2110 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "label": "value", + "localized_name": "value", + "name": "value", + "type": "INT,FLOAT,STRING,BOOLEAN", + "link": 197 + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 209 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyNumberConvert", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 195, + "type": "JsonExtractString", + "pos": [ + 5010, + 1630 + ], + "size": [ + 410, + 470 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "json_string", + "name": "json_string", + "type": "STRING", + "widget": { + "name": "json_string" + }, + "link": null + }, + { + "localized_name": "key", + "name": "key", + "type": "STRING", + "widget": { + "name": "key" + }, + "link": 218 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 199 + ] + } + ], + "properties": { + "Node name for S&R": "JsonExtractString", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "{\n \"Quality\": {\n \"num_steps\": 48,\n \"mu\": 0.0,\n \"std\": 1.5,\n \"preset_id\": \"V4_QUALITY_48\"\n },\n \"Default\": {\n \"num_steps\": 20,\n \"mu\": 0.0,\n \"std\": 1.75,\n \"preset_id\": \"V4_DEFAULT_20\"\n },\n \"Turbo\": {\n \"num_steps\": 12,\n \"mu\": 0.5,\n \"std\": 1.75,\n \"preset_id\": \"V4_TURBO_12\"\n }\n}", + "Default" + ] + }, + { + "id": 196, + "type": "StringReplace", + "pos": [ + 5050, + 2150 + ], + "size": [ + 230, + 40 + ], + "flags": { + "collapsed": true + }, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 199 + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 196, + 200, + 201 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "'", + "\"" + ] + }, + { + "id": 197, + "type": "JsonExtractString", + "pos": [ + 5460, + 1610 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "json_string", + "name": "json_string", + "type": "STRING", + "widget": { + "name": "json_string" + }, + "link": 200 + }, + { + "localized_name": "key", + "name": "key", + "type": "STRING", + "widget": { + "name": "key" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 202 + ] + } + ], + "properties": { + "Node name for S&R": "JsonExtractString", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "num_steps" + ] + }, + { + "id": 198, + "type": "JsonExtractString", + "pos": [ + 5450, + 2110 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "json_string", + "name": "json_string", + "type": "STRING", + "widget": { + "name": "json_string" + }, + "link": 201 + }, + { + "localized_name": "key", + "name": "key", + "type": "STRING", + "widget": { + "name": "key" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 197 + ] + } + ], + "properties": { + "Node name for S&R": "JsonExtractString", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "std" + ] + }, + { + "id": 199, + "type": "ComfyNumberConvert", + "pos": [ + 5920, + 1620 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "label": "value", + "localized_name": "value", + "name": "value", + "type": "INT,FLOAT,STRING,BOOLEAN", + "link": 202 + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 207 + ] + } + ], + "properties": { + "Node name for S&R": "ComfyNumberConvert", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 200, + "type": "UNETLoader", + "pos": [ + 4730, + 740 + ], + "size": [ + 470, + 170 + ], + "flags": {}, + "order": 23, + "mode": 0, + "showAdvanced": true, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 216 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 211 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.8.2", + "models": [ + { + "name": "ideogram4_unconditional_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Ideogram-4/resolve/main/diffusion_models/ideogram4_unconditional_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ideogram4_unconditional_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 201, + "type": "DualModelGuider", + "pos": [ + 5790, + 870 + ], + "size": [ + 270, + 180 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 223 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 213 + }, + { + "localized_name": "model_negative", + "name": "model_negative", + "shape": 7, + "type": "MODEL", + "link": 211 + }, + { + "localized_name": "negative", + "name": "negative", + "shape": 7, + "type": "CONDITIONING", + "link": 214 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 215 + ] + } + ], + "properties": { + "Node name for S&R": "DualModelGuider", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 202, + "type": "CustomCombo", + "pos": [ + 4720, + 1630 + ], + "size": [ + 270, + 280 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": 219 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 218 + ] + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "CustomCombo", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Default", + 1, + "Quality", + "Default", + "Turbo", + "" + ] + }, + { + "id": 203, + "type": "CFGOverride", + "pos": [ + 5790, + 650 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 222 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "start_percent", + "name": "start_percent", + "type": "FLOAT", + "widget": { + "name": "start_percent" + }, + "link": null + }, + { + "localized_name": "end_percent", + "name": "end_percent", + "type": "FLOAT", + "widget": { + "name": "end_percent" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 223 + ] + } + ], + "properties": { + "Node name for S&R": "CFGOverride", + "cnr_id": "comfy-core", + "ver": "0.23.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3, + 0.7, + 1 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + 4700, + 420, + 530, + 1100 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 2, + "title": "Latent Size", + "bounding": [ + 5260, + 1080, + 450, + 440 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 3, + "title": "Sampling", + "bounding": [ + 5740, + 420, + 780, + 1100 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "Prompt", + "bounding": [ + 5260, + 420, + 450, + 640 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 5, + "title": "Image Size", + "bounding": [ + 4130, + 1540, + 530, + 420 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 9, + "title": "Preset", + "bounding": [ + 4700, + 1540, + 1820, + 780 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 8, + "origin_id": 186, + "origin_slot": 0, + "target_id": 178, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 33, + "origin_id": 190, + "origin_slot": 1, + "target_id": 179, + "target_slot": 0, + "type": "INT" + }, + { + "id": 36, + "origin_id": 191, + "origin_slot": 1, + "target_id": 179, + "target_slot": 1, + "type": "INT" + }, + { + "id": 11, + "origin_id": 184, + "origin_slot": 0, + "target_id": 180, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 13, + "origin_id": 182, + "origin_slot": 0, + "target_id": 180, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 14, + "origin_id": 183, + "origin_slot": 0, + "target_id": 180, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 15, + "origin_id": 179, + "origin_slot": 0, + "target_id": 180, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 16, + "origin_id": 180, + "origin_slot": 0, + "target_id": 181, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 17, + "origin_id": 177, + "origin_slot": 0, + "target_id": 181, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 34, + "origin_id": 190, + "origin_slot": 1, + "target_id": 183, + "target_slot": 1, + "type": "INT" + }, + { + "id": 37, + "origin_id": 191, + "origin_slot": 1, + "target_id": 183, + "target_slot": 2, + "type": "INT" + }, + { + "id": 24, + "origin_id": 187, + "origin_slot": 0, + "target_id": 186, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 32, + "origin_id": 188, + "origin_slot": 0, + "target_id": 190, + "target_slot": 0, + "type": "INT" + }, + { + "id": 35, + "origin_id": 189, + "origin_slot": 0, + "target_id": 191, + "target_slot": 0, + "type": "INT" + }, + { + "id": 25, + "origin_id": 181, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 152, + "origin_id": -10, + "origin_slot": 0, + "target_id": 186, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 153, + "origin_id": -10, + "origin_slot": 1, + "target_id": 188, + "target_slot": 0, + "type": "INT" + }, + { + "id": 154, + "origin_id": -10, + "origin_slot": 2, + "target_id": 189, + "target_slot": 0, + "type": "INT" + }, + { + "id": 155, + "origin_id": -10, + "origin_slot": 3, + "target_id": 184, + "target_slot": 0, + "type": "INT" + }, + { + "id": 156, + "origin_id": -10, + "origin_slot": 4, + "target_id": 185, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 157, + "origin_id": -10, + "origin_slot": 5, + "target_id": 187, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 158, + "origin_id": -10, + "origin_slot": 6, + "target_id": 177, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 195, + "origin_id": 193, + "origin_slot": 0, + "target_id": 192, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 196, + "origin_id": 196, + "origin_slot": 0, + "target_id": 193, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 197, + "origin_id": 198, + "origin_slot": 0, + "target_id": 194, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 199, + "origin_id": 195, + "origin_slot": 0, + "target_id": 196, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 200, + "origin_id": 196, + "origin_slot": 0, + "target_id": 197, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 201, + "origin_id": 196, + "origin_slot": 0, + "target_id": 198, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 202, + "origin_id": 197, + "origin_slot": 0, + "target_id": 199, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 207, + "origin_id": 199, + "origin_slot": 1, + "target_id": 183, + "target_slot": 0, + "type": "INT" + }, + { + "id": 208, + "origin_id": 192, + "origin_slot": 0, + "target_id": 183, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 209, + "origin_id": 194, + "origin_slot": 0, + "target_id": 183, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 211, + "origin_id": 200, + "origin_slot": 0, + "target_id": 201, + "target_slot": 2, + "type": "MODEL" + }, + { + "id": 213, + "origin_id": 186, + "origin_slot": 0, + "target_id": 201, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 214, + "origin_id": 178, + "origin_slot": 0, + "target_id": 201, + "target_slot": 3, + "type": "CONDITIONING" + }, + { + "id": 215, + "origin_id": 201, + "origin_slot": 0, + "target_id": 180, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 216, + "origin_id": -10, + "origin_slot": 7, + "target_id": 200, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 218, + "origin_id": 202, + "origin_slot": 0, + "target_id": 195, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 219, + "origin_id": -10, + "origin_slot": 8, + "target_id": 202, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 222, + "origin_id": 185, + "origin_slot": 0, + "target_id": 203, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 223, + "origin_id": 203, + "origin_slot": 0, + "target_id": 201, + "target_slot": 0, + "type": "MODEL" + } + ], + "extra": {}, + "category": "Image generation and editing/Text to image", + "description": "This subgraph generates images using Ideogram v4, accepting plain text or structured JSON prompts for precise layout and style control. It suits detailed illustrations, concept art, or marketing visuals needing predictable composition and color palettes. The model uses flow-matching with asymmetric guidance, so no negative prompt is needed, but JSON prompts yield the best results." + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph generates images using Ideogram v4, accepting plain text or structured JSON prompts for precise layout and style control. It suits detailed illustrations, concept art, or marketing visuals needing predictable composition and color palettes. The model uses flow-matching with asymmetric guidance, so no negative prompt is needed, but JSON prompts yield the best results." + } +} \ No newline at end of file diff --git a/blueprints/Video Depth Estimation (Depth Anything 3).json b/blueprints/Video Depth Estimation (Depth Anything 3).json new file mode 100644 index 000000000..88084a393 --- /dev/null +++ b/blueprints/Video Depth Estimation (Depth Anything 3).json @@ -0,0 +1,825 @@ +{ + "revision": 0, + "last_node_id": 97, + "last_link_id": 0, + "nodes": [ + { + "id": 97, + "type": "253ec5ca-8333-4ddf-a036-9fc0923651b9", + "pos": [ + 410, + 500 + ], + "size": [ + 400, + 400 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "name": "start_time", + "type": "FLOAT", + "widget": { + "name": "start_time" + }, + "link": null + }, + { + "name": "duration", + "type": "FLOAT", + "widget": { + "name": "duration" + }, + "link": null + }, + { + "name": "resolution", + "type": "INT", + "widget": { + "name": "resolution" + }, + "link": null + }, + { + "name": "resize_method", + "type": "COMBO", + "widget": { + "name": "resize_method" + }, + "link": null + }, + { + "label": "output_type", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "output" + }, + "link": null + }, + { + "label": "normalization", + "name": "output.normalization", + "type": "COMBO", + "widget": { + "name": "output.normalization" + }, + "link": null + }, + { + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "widget": { + "name": "output.apply_sky_clip" + }, + "link": null + }, + { + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + }, + { + "name": "audio", + "type": "AUDIO", + "links": [] + }, + { + "name": "fps", + "type": "FLOAT", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "96", + "start_time" + ], + [ + "96", + "duration" + ], + [ + "93", + "resolution" + ], + [ + "93", + "resize_method" + ], + [ + "92", + "output" + ], + [ + "92", + "output.normalization" + ], + [ + "92", + "output.apply_sky_clip" + ], + [ + "94", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Video Depth Estimation (Depth Anything 3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "253ec5ca-8333-4ddf-a036-9fc0923651b9", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 97, + "lastLinkId": 129, + "lastRerouteId": 0 + }, + "revision": 2, + "config": {}, + "name": "Video Depth Estimation (Depth Anything 3)", + "inputNode": { + "id": -10, + "bounding": [ + -230, + 130, + 167.912109375, + 228 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1520, + 140, + 128, + 108 + ] + }, + "inputs": [ + { + "id": "698c28c6-cf92-4039-8b39-f3062868ea7c", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 119 + ], + "pos": [ + -86.087890625, + 154 + ] + }, + { + "id": "97a1f63e-1585-4a40-9dec-e2700120d84a", + "name": "start_time", + "type": "FLOAT", + "linkIds": [ + 121 + ], + "pos": [ + -86.087890625, + 174 + ] + }, + { + "id": "4dbbd3b3-c5ee-4a56-a0d3-3268d3b2fd64", + "name": "duration", + "type": "FLOAT", + "linkIds": [ + 122 + ], + "pos": [ + -86.087890625, + 194 + ] + }, + { + "id": "16f55101-f99d-4c0c-bebf-c3b31c54f13e", + "name": "resolution", + "type": "INT", + "linkIds": [ + 124 + ], + "pos": [ + -86.087890625, + 214 + ] + }, + { + "id": "d9cd7693-4bb3-4ed7-9a75-276b997abcd9", + "name": "resize_method", + "type": "COMBO", + "linkIds": [ + 125 + ], + "pos": [ + -86.087890625, + 234 + ] + }, + { + "id": "a6e90532-323b-462e-ba9c-1672384d5b31", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "linkIds": [ + 126 + ], + "label": "output_type", + "pos": [ + -86.087890625, + 254 + ] + }, + { + "id": "69e6aeef-437d-4fde-b2fc-d5ab9369238d", + "name": "output.normalization", + "type": "COMBO", + "linkIds": [ + 127 + ], + "label": "normalization", + "pos": [ + -86.087890625, + 274 + ] + }, + { + "id": "73206f72-f89a-4698-885e-5d9277df2998", + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "linkIds": [ + 128 + ], + "pos": [ + -86.087890625, + 294 + ] + }, + { + "id": "dddbc7fc-9431-448a-9ed3-9aa62404288b", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 129 + ], + "pos": [ + -86.087890625, + 314 + ] + } + ], + "outputs": [ + { + "id": "478ab537-63bc-4d74-a9f0-c975f550880f", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 7 + ], + "localized_name": "IMAGE", + "pos": [ + 1544, + 164 + ] + }, + { + "id": "cdaf037e-79bc-4a94-b06c-0fd32e76f615", + "name": "audio", + "type": "AUDIO", + "linkIds": [ + 112 + ], + "pos": [ + 1544, + 184 + ] + }, + { + "id": "4c0e5484-d193-49c7-b107-92619628880a", + "name": "fps", + "type": "FLOAT", + "linkIds": [ + 113 + ], + "pos": [ + 1544, + 204 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 92, + "type": "DA3Render", + "pos": [ + 740, + 230 + ], + "size": [ + 380, + 130 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "da3_geometry", + "name": "da3_geometry", + "type": "DA3_GEOMETRY", + "link": 12 + }, + { + "localized_name": "output", + "name": "output", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "output" + }, + "link": 126 + }, + { + "localized_name": "output.normalization", + "name": "output.normalization", + "type": "COMBO", + "widget": { + "name": "output.normalization" + }, + "link": 127 + }, + { + "localized_name": "output.apply_sky_clip", + "name": "output.apply_sky_clip", + "type": "BOOLEAN", + "widget": { + "name": "output.apply_sky_clip" + }, + "link": 128 + }, + { + "name": "geometry", + "type": "DA3_GEOMETRY", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "Node name for S&R": "DA3Render", + "cnr_id": "comfy-core", + "ver": "0.19.0" + }, + "widgets_values": [ + "depth", + "v2_style", + false + ] + }, + { + "id": 93, + "type": "DA3Inference", + "pos": [ + 740, + -30 + ], + "size": [ + 390, + 130 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "da3_model", + "name": "da3_model", + "type": "DA3_MODEL", + "link": 107 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 111 + }, + { + "localized_name": "resolution", + "name": "resolution", + "type": "INT", + "widget": { + "name": "resolution" + }, + "link": 124 + }, + { + "localized_name": "resize_method", + "name": "resize_method", + "type": "COMBO", + "widget": { + "name": "resize_method" + }, + "link": 125 + }, + { + "localized_name": "mode", + "name": "mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "da3_geometry", + "name": "da3_geometry", + "type": "DA3_GEOMETRY", + "slot_index": 0, + "links": [ + 12 + ] + } + ], + "properties": { + "Node name for S&R": "DA3Inference", + "cnr_id": "comfy-core", + "ver": "0.19.0" + }, + "widgets_values": [ + 504, + "lower_bound_resize", + "mono" + ] + }, + { + "id": 94, + "type": "LoadDA3Model", + "pos": [ + 50, + 410 + ], + "size": [ + 400, + 140 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 129 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "DA3_MODEL", + "name": "DA3_MODEL", + "type": "DA3_MODEL", + "links": [ + 107 + ] + } + ], + "properties": { + "Node name for S&R": "LoadDA3Model", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "depth_anything_3_mono_large.safetensors", + "url": "https://huggingface.co/Comfy-Org/Depth-Anything-3/resolve/main/geometry_estimation/depth_anything_3_mono_large.safetensors", + "directory": "geometry_estimation" + } + ] + }, + "widgets_values": [ + "depth_anything_3_mono_large.safetensors", + "default" + ] + }, + { + "id": 95, + "type": "GetVideoComponents", + "pos": [ + 70, + -140 + ], + "size": [ + 260, + 120 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 120 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 111 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 112 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 113 + ] + }, + { + "localized_name": "bit_depth", + "name": "bit_depth", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.24.0" + } + }, + { + "id": 96, + "type": "Video Slice", + "pos": [ + 70, + -360 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 119 + }, + { + "localized_name": "start_time", + "name": "start_time", + "type": "FLOAT", + "widget": { + "name": "start_time" + }, + "link": 121 + }, + { + "localized_name": "duration", + "name": "duration", + "type": "FLOAT", + "widget": { + "name": "duration" + }, + "link": 122 + }, + { + "localized_name": "strict_duration", + "name": "strict_duration", + "type": "BOOLEAN", + "widget": { + "name": "strict_duration" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "Video Slice", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 0, + 5, + false + ] + } + ], + "groups": [], + "links": [ + { + "id": 12, + "origin_id": 93, + "origin_slot": 0, + "target_id": 92, + "target_slot": 0, + "type": "DA3_GEOMETRY" + }, + { + "id": 7, + "origin_id": 92, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 107, + "origin_id": 94, + "origin_slot": 0, + "target_id": 93, + "target_slot": 0, + "type": "DA3_MODEL" + }, + { + "id": 111, + "origin_id": 95, + "origin_slot": 0, + "target_id": 93, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 112, + "origin_id": 95, + "origin_slot": 1, + "target_id": -20, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 113, + "origin_id": 95, + "origin_slot": 2, + "target_id": -20, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 119, + "origin_id": -10, + "origin_slot": 0, + "target_id": 96, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 120, + "origin_id": 96, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 121, + "origin_id": -10, + "origin_slot": 1, + "target_id": 96, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 122, + "origin_id": -10, + "origin_slot": 2, + "target_id": 96, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 124, + "origin_id": -10, + "origin_slot": 3, + "target_id": 93, + "target_slot": 2, + "type": "INT" + }, + { + "id": 125, + "origin_id": -10, + "origin_slot": 4, + "target_id": 93, + "target_slot": 3, + "type": "COMBO" + }, + { + "id": 126, + "origin_id": -10, + "origin_slot": 5, + "target_id": 92, + "target_slot": 1, + "type": "COMFY_DYNAMICCOMBO_V3" + }, + { + "id": 127, + "origin_id": -10, + "origin_slot": 6, + "target_id": 92, + "target_slot": 2, + "type": "COMBO" + }, + { + "id": 128, + "origin_id": -10, + "origin_slot": 7, + "target_id": 92, + "target_slot": 3, + "type": "BOOLEAN" + }, + { + "id": 129, + "origin_id": -10, + "origin_slot": 8, + "target_id": 94, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Conditioning & Preprocessors/Depth", + "description": "This subgraph processes a video input through Depth Anything 3 to produce temporally consistent depth maps for each frame, outputting a depth video. It is ideal for video content requiring spatial geometry estimation, such as 3D reconstruction, SLAM, or novel view synthesis from moving cameras. The model uses a plain transformer backbone trained with a depth-ray representation, supporting any number of views without requiring known camera poses." + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph processes a video input through Depth Anything 3 to produce temporally consistent depth maps for each frame, outputting a depth video. It is ideal for video content requiring spatial geometry estimation, such as 3D reconstruction, SLAM, or novel view synthesis from moving cameras. The model uses a plain transformer backbone trained with a depth-ray representation, supporting any number of views without requiring known camera poses." + } +} \ No newline at end of file diff --git a/blueprints/Video Edit (Bernini-R).json b/blueprints/Video Edit (Bernini-R).json new file mode 100644 index 000000000..2f09db5fa --- /dev/null +++ b/blueprints/Video Edit (Bernini-R).json @@ -0,0 +1,3732 @@ +{ + "revision": 0, + "last_node_id": 376, + "last_link_id": 0, + "nodes": [ + { + "id": 376, + "type": "70d8911e-8530-4a3d-9889-b39e8fbd131b", + "pos": [ + 4090, + 4890 + ], + "size": [ + 480, + 740 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "label": "source_video", + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "name": "reference_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "reference_images", + "name": "reference_images.reference_image_0", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "task_type", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "label": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "name": "ref_max_size", + "shape": 7, + "type": "INT", + "widget": { + "name": "ref_max_size" + }, + "link": null + }, + { + "label": "turbo_mode", + "name": "value_1", + "type": "BOOLEAN", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "label": "high_noise_model", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "low_noise_model", + "name": "unet_name_1", + "type": "COMBO", + "widget": { + "name": "unet_name_1" + }, + "link": null + }, + { + "label": "distill_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "371", + "value" + ], + [ + "356", + "choice" + ], + [ + "352", + "width" + ], + [ + "352", + "height" + ], + [ + "349", + "noise_seed" + ], + [ + "352", + "length" + ], + [ + "352", + "ref_max_size" + ], + [ + "368", + "value" + ], + [ + "344", + "unet_name" + ], + [ + "346", + "unet_name" + ], + [ + "345", + "lora_name" + ], + [ + "338", + "clip_name" + ], + [ + "339", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [], + "title": "Video Edit (Bernini-R)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "70d8911e-8530-4a3d-9889-b39e8fbd131b", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 376, + "lastLinkId": 496, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Edit (Bernini-R)", + "inputNode": { + "id": -10, + "bounding": [ + 5240, + 5360, + 149.689453125, + 368 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 10004.984375, + 5000.9921875, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "15c3bfa3-6844-473a-a927-a50284131356", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 457 + ], + "localized_name": "video", + "label": "source_video", + "pos": [ + 5365.689453125, + 5384 + ] + }, + { + "id": "c02ac440-58ae-4415-bb24-dabb61b91f69", + "name": "reference_video", + "type": "IMAGE", + "linkIds": [ + 465 + ], + "pos": [ + 5365.689453125, + 5404 + ] + }, + { + "id": "c46d38f5-b0e8-4620-bd0e-4e86db7bdb1b", + "name": "reference_images.reference_image_0", + "type": "IMAGE", + "linkIds": [ + 466 + ], + "label": "reference_images", + "pos": [ + 5365.689453125, + 5424 + ] + }, + { + "id": "565fc711-6052-4c07-b638-403e01fcf7f8", + "name": "value", + "type": "STRING", + "linkIds": [ + 467 + ], + "pos": [ + 5365.689453125, + 5444 + ] + }, + { + "id": "bfcd6148-0bae-43b1-9440-a133fbc2663c", + "name": "choice", + "type": "COMBO", + "linkIds": [ + 468 + ], + "label": "task_type", + "pos": [ + 5365.689453125, + 5464 + ] + }, + { + "id": "28fedb03-828c-49d9-91ec-d3669a10c3b9", + "name": "width", + "type": "INT", + "linkIds": [ + 469 + ], + "pos": [ + 5365.689453125, + 5484 + ] + }, + { + "id": "69994b05-577e-486f-bd10-3360261d7bb8", + "name": "height", + "type": "INT", + "linkIds": [ + 470 + ], + "pos": [ + 5365.689453125, + 5504 + ] + }, + { + "id": "db300c09-5b92-41ad-990b-9dd8dad35f86", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 480 + ], + "label": "noise_seed", + "pos": [ + 5365.689453125, + 5524 + ] + }, + { + "id": "db664a31-39e5-4d6a-a5f1-3abac65b35d1", + "name": "length", + "type": "INT", + "linkIds": [ + 481 + ], + "pos": [ + 5365.689453125, + 5544 + ] + }, + { + "id": "26aa0c9e-9daa-4302-ab5c-5ac9141b9e20", + "name": "ref_max_size", + "type": "INT", + "linkIds": [ + 482 + ], + "pos": [ + 5365.689453125, + 5564 + ] + }, + { + "id": "ee527a92-8cc9-4b16-9858-9daab9ef2c45", + "name": "value_1", + "type": "BOOLEAN", + "linkIds": [ + 488 + ], + "label": "turbo_mode", + "pos": [ + 5365.689453125, + 5584 + ] + }, + { + "id": "8a6c9f3d-e24e-4d40-9c03-864bd4458376", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 489 + ], + "label": "high_noise_model", + "pos": [ + 5365.689453125, + 5604 + ] + }, + { + "id": "f35afd1c-c183-4d18-8672-314527728e9b", + "name": "unet_name_1", + "type": "COMBO", + "linkIds": [ + 490 + ], + "label": "low_noise_model", + "pos": [ + 5365.689453125, + 5624 + ] + }, + { + "id": "fff89f1d-615c-436e-b28a-3b7f915d0b05", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 491, + 492 + ], + "label": "distill_lora", + "pos": [ + 5365.689453125, + 5644 + ] + }, + { + "id": "d76ff30d-c865-49b4-bccb-fb6e0a9b4f34", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 493 + ], + "pos": [ + 5365.689453125, + 5664 + ] + }, + { + "id": "0850a515-4051-4de3-9343-7db929548ada", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 494 + ], + "pos": [ + 5365.689453125, + 5684 + ] + } + ], + "outputs": [ + { + "id": "7d994238-c919-43c6-9d97-340c9e383743", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 458 + ], + "localized_name": "VIDEO", + "pos": [ + 10028.984375, + 5024.9921875 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 338, + "type": "CLIPLoader", + "pos": [ + 6170, + 5760 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 493 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 409, + 438 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors?download=true", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 339, + "type": "VAELoader", + "pos": [ + 6170, + 5990 + ], + "size": [ + 670, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 494 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 414, + 436 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "Wan2_1_VAE_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan2_1_VAE_bf16.safetensors?download=true", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Wan2_1_VAE_bf16.safetensors" + ] + }, + { + "id": 340, + "type": "LoraLoaderModelOnly", + "pos": [ + 6180, + 5550 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 408 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 492 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 441 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors?download=true", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + 1.5 + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 341, + "type": "CLIPTextEncode", + "pos": [ + 6950, + 5400 + ], + "size": [ + 700, + 240 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 409 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 435 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 342, + "type": "SplitSigmas", + "pos": [ + 8520, + 5180 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 410 + }, + { + "localized_name": "step", + "name": "step", + "type": "INT", + "widget": { + "name": "step" + }, + "link": 411 + } + ], + "outputs": [ + { + "localized_name": "high_sigmas", + "name": "high_sigmas", + "type": "SIGMAS", + "links": [ + 422 + ] + }, + { + "localized_name": "low_sigmas", + "name": "low_sigmas", + "type": "SIGMAS", + "links": [ + 431 + ] + } + ], + "properties": { + "Node name for S&R": "SplitSigmas", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 343, + "type": "KSamplerSelect", + "pos": [ + 8520, + 5370 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 421, + 430 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "res_multistep" + ] + }, + { + "id": 344, + "type": "UNETLoader", + "pos": [ + 6170, + 4930 + ], + "size": [ + 670, + 140 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 489 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 412, + 454 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Bernini-R/resolve/main/diffusion_models/wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "wan2.2_bernini_r_high_noise_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 345, + "type": "LoraLoaderModelOnly", + "pos": [ + 6170, + 5120 + ], + "size": [ + 670, + 170 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 412 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 491 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 455 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors?download=true", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank64_bf16.safetensors", + 3 + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 346, + "type": "UNETLoader", + "pos": [ + 6170, + 5350 + ], + "size": [ + 670, + 140 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 490 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 408, + 425, + 440 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "models": [ + { + "name": "wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Bernini-R/resolve/main/diffusion_models/wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "wan2.2_bernini_r_low_noise_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 347, + "type": "VAEDecode", + "pos": [ + 9690, + 4950 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 413 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 414 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 415 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 348, + "type": "CreateVideo", + "pos": [ + 9690, + 5120 + ], + "size": [ + 260, + 160 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 415 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 416 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 417 + }, + { + "localized_name": "bit_depth", + "name": "bit_depth", + "shape": 7, + "type": "INT", + "widget": { + "name": "bit_depth" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 458 + ] + } + ], + "properties": { + "Node name for S&R": "CreateVideo", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24, + 8 + ] + }, + { + "id": 349, + "type": "SamplerCustom", + "pos": [ + 8860, + 4960 + ], + "size": [ + 280, + 680 + ], + "flags": {}, + "order": 18, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 418 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 419 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 420 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 421 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 422 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 423 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 480 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 424 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 432 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + true, + 852303761886160, + "randomize", + 1 + ] + }, + { + "id": 350, + "type": "BasicScheduler", + "pos": [ + 8520, + 4960 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 425 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 426 + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 410 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "simple", + 6, + 1 + ] + }, + { + "id": 351, + "type": "SamplerCustom", + "pos": [ + 9190, + 4950 + ], + "size": [ + 280, + 680 + ], + "flags": {}, + "order": 20, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 427 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 428 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 429 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 430 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 431 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 432 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "BOOLEAN", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 433 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 413 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "SamplerCustom", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false, + 0, + "fixed", + 1 + ] + }, + { + "id": 352, + "type": "BerniniConditioning", + "pos": [ + 7160, + 5720 + ], + "size": [ + 310, + 380 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 434 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 435 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 436 + }, + { + "localized_name": "source_video", + "name": "source_video", + "shape": 7, + "type": "IMAGE", + "link": 437 + }, + { + "localized_name": "reference_video", + "name": "reference_video", + "shape": 7, + "type": "IMAGE", + "link": 465 + }, + { + "label": "reference_image_0", + "localized_name": "reference_images.reference_image_0", + "name": "reference_images.reference_image_0", + "shape": 7, + "type": "IMAGE", + "link": 466 + }, + { + "label": "reference_image_1", + "localized_name": "reference_images.reference_image_1", + "name": "reference_images.reference_image_1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 469 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 470 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 481 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + }, + { + "localized_name": "ref_max_size", + "name": "ref_max_size", + "shape": 7, + "type": "INT", + "widget": { + "name": "ref_max_size" + }, + "link": 482 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 419, + 428 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 420, + 429 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 423 + ] + } + ], + "properties": { + "Node name for S&R": "BerniniConditioning", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 480, + 832, + 81, + 1, + 848 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 353, + "type": "GetVideoComponents", + "pos": [ + 6170, + 6220 + ], + "size": [ + 230, + 150 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 457 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 437 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 416 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 417 + ] + }, + { + "localized_name": "bit_depth", + "name": "bit_depth", + "type": "INT", + "links": [] + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 354, + "type": "CLIPTextEncode", + "pos": [ + 6950, + 4940 + ], + "size": [ + 710, + 390 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 438 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 439 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 434 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.24.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 355, + "type": "ComfySwitchNode", + "pos": [ + 8140, + 5250 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 440 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 441 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 442 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 427 + ] + } + ], + "title": "Switch (Low Noise)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 356, + "type": "CustomCombo", + "pos": [ + 6170, + 3800 + ], + "size": [ + 460, + 600 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": 468 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [] + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 443 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "Video Editing (Content Propagation)", + 7, + "Default", + "Text to Image", + "Text to Video", + "Image Editing", + "Subject to Image", + "Image to Video", + "Video Editing", + "Video Editing (Content Propagation)", + "Video Editing with Reference", + "Ads / Content Insertion", + "Video Editing (Action / Position)", + "Video Editing (Style / Motion)", + "" + ] + }, + { + "id": 357, + "type": "a98d3dcb-12b1-467c-94b8-723a89533c30", + "pos": [ + 6680, + 3800 + ], + "size": [ + 390, + 440 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "text_per_line", + "name": "text_per_line", + "type": "STRING", + "widget": { + "name": "text_per_line" + }, + "link": null + }, + { + "localized_name": "index", + "name": "index", + "type": "INT", + "widget": { + "name": "index" + }, + "link": 443 + } + ], + "outputs": [ + { + "localized_name": "selected_line", + "name": "selected_line", + "type": "STRING", + "links": [ + 444 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "373", + "string" + ], + [ + "374", + "value" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [] + }, + { + "id": 358, + "type": "StringConcatenate", + "pos": [ + 6680, + 4500 + ], + "size": [ + 400, + 250 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "string_a", + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 444 + }, + { + "localized_name": "string_b", + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 459 + }, + { + "localized_name": "delimiter", + "name": "delimiter", + "type": "STRING", + "widget": { + "name": "delimiter" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 439 + ] + } + ], + "properties": { + "Node name for S&R": "StringConcatenate", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "", + "", + "" + ] + }, + { + "id": 359, + "type": "PrimitiveInt", + "pos": [ + 7740, + 4970 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 445 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 40, + "fixed" + ] + }, + { + "id": 360, + "type": "PrimitiveInt", + "pos": [ + 7750, + 5510 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 446 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 6, + "fixed" + ] + }, + { + "id": 361, + "type": "ComfySwitchNode", + "pos": [ + 8140, + 5440 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 445 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 446 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 447 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 426 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 362, + "type": "PrimitiveInt", + "pos": [ + 7740, + 5130 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 448 + ] + } + ], + "title": "Int (Split Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 363, + "type": "PrimitiveInt", + "pos": [ + 7750, + 5680 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 449 + ] + } + ], + "title": "Int (Split Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 3, + "fixed" + ] + }, + { + "id": 364, + "type": "ComfySwitchNode", + "pos": [ + 8140, + 5640 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 448 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 449 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 450 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 411 + ] + } + ], + "title": "Switch (Low Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 365, + "type": "ComfySwitchNode", + "pos": [ + 8140, + 5830 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 451 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 452 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 453 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 424, + 433 + ] + } + ], + "title": "Switch (CFG)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 366, + "type": "PrimitiveFloat", + "pos": [ + 7750, + 5840 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 452 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 367, + "type": "PrimitiveFloat", + "pos": [ + 7740, + 5290 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 451 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 368, + "type": "PrimitiveBoolean", + "pos": [ + 7750, + 6020 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 488 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 442, + 447, + 450, + 453, + 456 + ] + } + ], + "title": "Boolean (Enable Turbo LoRA?)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + true + ] + }, + { + "id": 369, + "type": "ComfySwitchNode", + "pos": [ + 8140, + 5060 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 454 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 455 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 456 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 418 + ] + } + ], + "title": "Switch (High Noise)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + false + ] + }, + { + "id": 370, + "type": "MarkdownNote", + "pos": [ + 7730, + 4680 + ], + "size": [ + 340, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "You can find the original settings here: https://github.com/bytedance/Bernini/blob/main/gradio_demo.py" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 371, + "type": "PrimitiveStringMultiline", + "pos": [ + 6160, + 4510 + ], + "size": [ + 470, + 230 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 467 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 459 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveStringMultiline", + "cnr_id": "comfy-core", + "ver": "0.24.0" + }, + "widgets_values": [ + "Replace the gray studio backdrop with a daytime urban street: brick buildings, shop windows, sidewalk, and soft overcast light. Keep the model's outfit, accessories, body pose, motion, and full-body framing unchanged. Only the environment behind the subject should change." + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt & Conditioning", + "bounding": [ + 6930, + 4860, + 760, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 2, + "title": "Models", + "bounding": [ + 6150, + 4860, + 750, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 3, + "title": "Sampling", + "bounding": [ + 8490, + 4860, + 1160, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 8, + "title": "Prompt Construction", + "bounding": [ + 6150, + 3690, + 960, + 1120 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "System Prompt (Based on task type)", + "bounding": [ + 6160, + 3730, + 920, + 680 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 7, + "title": "Switch Settings", + "bounding": [ + 7720, + 4860, + 740, + 1270 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 5, + "title": "Distill LoRA settings", + "bounding": [ + 7730, + 5440, + 310, + 530 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 6, + "title": "Original Settings", + "bounding": [ + 7730, + 4900, + 300, + 504 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 408, + "origin_id": 346, + "origin_slot": 0, + "target_id": 340, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 409, + "origin_id": 338, + "origin_slot": 0, + "target_id": 341, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 410, + "origin_id": 350, + "origin_slot": 0, + "target_id": 342, + "target_slot": 0, + "type": "SIGMAS" + }, + { + "id": 411, + "origin_id": 364, + "origin_slot": 0, + "target_id": 342, + "target_slot": 1, + "type": "INT" + }, + { + "id": 412, + "origin_id": 344, + "origin_slot": 0, + "target_id": 345, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 413, + "origin_id": 351, + "origin_slot": 0, + "target_id": 347, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 414, + "origin_id": 339, + "origin_slot": 0, + "target_id": 347, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 415, + "origin_id": 347, + "origin_slot": 0, + "target_id": 348, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 416, + "origin_id": 353, + "origin_slot": 1, + "target_id": 348, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 417, + "origin_id": 353, + "origin_slot": 2, + "target_id": 348, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 418, + "origin_id": 369, + "origin_slot": 0, + "target_id": 349, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 419, + "origin_id": 352, + "origin_slot": 0, + "target_id": 349, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 420, + "origin_id": 352, + "origin_slot": 1, + "target_id": 349, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 421, + "origin_id": 343, + "origin_slot": 0, + "target_id": 349, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 422, + "origin_id": 342, + "origin_slot": 0, + "target_id": 349, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 423, + "origin_id": 352, + "origin_slot": 2, + "target_id": 349, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 424, + "origin_id": 365, + "origin_slot": 0, + "target_id": 349, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 425, + "origin_id": 346, + "origin_slot": 0, + "target_id": 350, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 426, + "origin_id": 361, + "origin_slot": 0, + "target_id": 350, + "target_slot": 2, + "type": "INT" + }, + { + "id": 427, + "origin_id": 355, + "origin_slot": 0, + "target_id": 351, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 428, + "origin_id": 352, + "origin_slot": 0, + "target_id": 351, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 429, + "origin_id": 352, + "origin_slot": 1, + "target_id": 351, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 430, + "origin_id": 343, + "origin_slot": 0, + "target_id": 351, + "target_slot": 3, + "type": "SAMPLER" + }, + { + "id": 431, + "origin_id": 342, + "origin_slot": 1, + "target_id": 351, + "target_slot": 4, + "type": "SIGMAS" + }, + { + "id": 432, + "origin_id": 349, + "origin_slot": 0, + "target_id": 351, + "target_slot": 5, + "type": "LATENT" + }, + { + "id": 433, + "origin_id": 365, + "origin_slot": 0, + "target_id": 351, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 434, + "origin_id": 354, + "origin_slot": 0, + "target_id": 352, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 435, + "origin_id": 341, + "origin_slot": 0, + "target_id": 352, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 436, + "origin_id": 339, + "origin_slot": 0, + "target_id": 352, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 437, + "origin_id": 353, + "origin_slot": 0, + "target_id": 352, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 438, + "origin_id": 338, + "origin_slot": 0, + "target_id": 354, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 439, + "origin_id": 358, + "origin_slot": 0, + "target_id": 354, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 440, + "origin_id": 346, + "origin_slot": 0, + "target_id": 355, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 441, + "origin_id": 340, + "origin_slot": 0, + "target_id": 355, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 442, + "origin_id": 368, + "origin_slot": 0, + "target_id": 355, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 443, + "origin_id": 356, + "origin_slot": 1, + "target_id": 357, + "target_slot": 1, + "type": "INT" + }, + { + "id": 444, + "origin_id": 357, + "origin_slot": 0, + "target_id": 358, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 459, + "origin_id": 371, + "origin_slot": 0, + "target_id": 358, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 445, + "origin_id": 359, + "origin_slot": 0, + "target_id": 361, + "target_slot": 0, + "type": "INT" + }, + { + "id": 446, + "origin_id": 360, + "origin_slot": 0, + "target_id": 361, + "target_slot": 1, + "type": "INT" + }, + { + "id": 447, + "origin_id": 368, + "origin_slot": 0, + "target_id": 361, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 448, + "origin_id": 362, + "origin_slot": 0, + "target_id": 364, + "target_slot": 0, + "type": "INT" + }, + { + "id": 449, + "origin_id": 363, + "origin_slot": 0, + "target_id": 364, + "target_slot": 1, + "type": "INT" + }, + { + "id": 450, + "origin_id": 368, + "origin_slot": 0, + "target_id": 364, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 451, + "origin_id": 367, + "origin_slot": 0, + "target_id": 365, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 452, + "origin_id": 366, + "origin_slot": 0, + "target_id": 365, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 453, + "origin_id": 368, + "origin_slot": 0, + "target_id": 365, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 454, + "origin_id": 344, + "origin_slot": 0, + "target_id": 369, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 455, + "origin_id": 345, + "origin_slot": 0, + "target_id": 369, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 456, + "origin_id": 368, + "origin_slot": 0, + "target_id": 369, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 457, + "origin_id": -10, + "origin_slot": 0, + "target_id": 353, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 458, + "origin_id": 348, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 465, + "origin_id": -10, + "origin_slot": 1, + "target_id": 352, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 466, + "origin_id": -10, + "origin_slot": 2, + "target_id": 352, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 467, + "origin_id": -10, + "origin_slot": 3, + "target_id": 371, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 468, + "origin_id": -10, + "origin_slot": 4, + "target_id": 356, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 469, + "origin_id": -10, + "origin_slot": 5, + "target_id": 352, + "target_slot": 7, + "type": "INT" + }, + { + "id": 470, + "origin_id": -10, + "origin_slot": 6, + "target_id": 352, + "target_slot": 8, + "type": "INT" + }, + { + "id": 480, + "origin_id": -10, + "origin_slot": 7, + "target_id": 349, + "target_slot": 7, + "type": "INT" + }, + { + "id": 481, + "origin_id": -10, + "origin_slot": 8, + "target_id": 352, + "target_slot": 9, + "type": "INT" + }, + { + "id": 482, + "origin_id": -10, + "origin_slot": 9, + "target_id": 352, + "target_slot": 11, + "type": "INT" + }, + { + "id": 488, + "origin_id": -10, + "origin_slot": 10, + "target_id": 368, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 489, + "origin_id": -10, + "origin_slot": 11, + "target_id": 344, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 490, + "origin_id": -10, + "origin_slot": 12, + "target_id": 346, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 491, + "origin_id": -10, + "origin_slot": 13, + "target_id": 345, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 492, + "origin_id": -10, + "origin_slot": 13, + "target_id": 340, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 493, + "origin_id": -10, + "origin_slot": 14, + "target_id": 338, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 494, + "origin_id": -10, + "origin_slot": 15, + "target_id": 339, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video generation and editing/Video Edit", + "description": "This subgraph uses Depth Anything 3 to predict spatially consistent geometry from any number of images or video frames, with or without known camera poses. It outputs depth maps, camera poses, and optionally 3D Gaussian parameters for novel view synthesis." + }, + { + "id": "a98d3dcb-12b1-467c-94b8-723a89533c30", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 376, + "lastLinkId": 496, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Select Per-Line Text by Index", + "description": "Selects one line from multiline text by zero-based index for batch or list-driven prompt workflows.", + "inputNode": { + "id": -10, + "bounding": [ + -990, + 8595, + 128, + 88 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 710, + 8585, + 128, + 68 + ] + }, + "inputs": [ + { + "id": "75417d82-a934-4ac9-b667-d8dcd5a3bfb3", + "name": "text_per_line", + "type": "STRING", + "linkIds": [ + 13 + ], + "localized_name": "text_per_line", + "pos": [ + -886, + 8619 + ] + }, + { + "id": "46e69a73-1804-4ca6-9175-31445bf0be96", + "name": "index", + "type": "INT", + "linkIds": [ + 14 + ], + "localized_name": "index", + "pos": [ + -886, + 8639 + ] + } + ], + "outputs": [ + { + "id": "e34e8ad1-84d2-4bd2-a460-eb7de6067c10", + "name": "selected_line", + "type": "STRING", + "linkIds": [ + 10 + ], + "localized_name": "selected_line", + "pos": [ + 734, + 8609 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 372, + "type": "PreviewAny", + "pos": [ + -500, + 8400 + ], + "size": [ + 230, + 180 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 1 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 373, + "type": "RegexExtract", + "pos": [ + -240, + 8740 + ], + "size": [ + 470, + 460 + ], + "flags": {}, + "order": 1, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 13 + }, + { + "localized_name": "regex_pattern", + "name": "regex_pattern", + "type": "STRING", + "widget": { + "name": "regex_pattern" + }, + "link": 9 + }, + { + "localized_name": "mode", + "name": "mode", + "type": "COMBO", + "widget": { + "name": "mode" + }, + "link": null + }, + { + "localized_name": "case_insensitive", + "name": "case_insensitive", + "type": "BOOLEAN", + "widget": { + "name": "case_insensitive" + }, + "link": null + }, + { + "localized_name": "multiline", + "name": "multiline", + "type": "BOOLEAN", + "widget": { + "name": "multiline" + }, + "link": null + }, + { + "localized_name": "dotall", + "name": "dotall", + "type": "BOOLEAN", + "widget": { + "name": "dotall" + }, + "link": null + }, + { + "localized_name": "group_index", + "name": "group_index", + "type": "INT", + "widget": { + "name": "group_index" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 10 + ] + } + ], + "properties": { + "Node name for S&R": "RegexExtract", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "You are a helpful assistant.\nYou are a helpful assistant specialized in text-to-image generation.\nYou are a helpful assistant specialized in text-to-video generation.\nYou are a helpful assistant specialized in image editing.\nYou are a helpful assistant specialized in subject-to-image generation.\nYou are a helpful assistant specialized in image-to-video generation.\nYou are a helpful assistant specialized in video editing.\nYou are a helpful assistant specialized in video editing on content propagation.\nYou are a helpful assistant specialized in video editing with reference.\nYou are a helpful assistant specialized in ads insertion.\nYou are a helpful assistant for editing. You may need to adjust the subject's action or position.\nYou are a helpful assistant for editing. You might need to adjust the video's style, lighting, colors, textures, and the subject's pose or action.", + "", + "First Group", + false, + false, + false, + 1 + ] + }, + { + "id": 374, + "type": "PrimitiveInt", + "pos": [ + -810, + 8400 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 14 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 1 + ] + } + ], + "title": "Int (line index)", + "properties": { + "Node name for S&R": "Int (line index)", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 375, + "type": "StringReplace", + "pos": [ + -240, + 8400 + ], + "size": [ + 400, + 280 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": null + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 6 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 9 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "^(?:[^\\n]*\\n){index}([^\\n]*)(?:\\n|$)", + "index", + "" + ] + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": 374, + "origin_slot": 0, + "target_id": 372, + "target_slot": 0, + "type": "INT" + }, + { + "id": 9, + "origin_id": 375, + "origin_slot": 0, + "target_id": 373, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 6, + "origin_id": 372, + "origin_slot": 0, + "target_id": 375, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 10, + "origin_id": 373, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 13, + "origin_id": -10, + "origin_slot": 0, + "target_id": 373, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 14, + "origin_id": -10, + "origin_slot": 1, + "target_id": 374, + "target_slot": 0, + "type": "INT" + } + ], + "extra": { + "ue_links": [], + "links_added_by_ue": [] + } + } + ] + }, + "extra": { + "BlueprintDescription": "This subgraph uses Depth Anything 3 to predict spatially consistent geometry from any number of images or video frames, with or without known camera poses. It outputs depth maps, camera poses, and optionally 3D Gaussian parameters for novel view synthesis." + } +} \ No newline at end of file From c7b246edc4401307172b22eb257896c1b5eb6a24 Mon Sep 17 00:00:00 2001 From: EXA4V Date: Wed, 17 Jun 2026 04:01:05 +0200 Subject: [PATCH 42/52] docs: add M3 and M4 to Apple Silicon supported chips list (#14449) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 61036bd81..06f0d7050 100644 --- a/README.md +++ b/README.md @@ -309,7 +309,7 @@ After this you should have everything installed and can proceed to running Comfy #### Apple Mac silicon -You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS version. +You can install ComfyUI in Apple Mac silicon (M1, M2, M3 or M4) with any recent macOS version. 1. Install pytorch nightly. For instructions, read the [Accelerated PyTorch training on Mac](https://developer.apple.com/metal/pytorch/) Apple Developer guide (make sure to install the latest pytorch nightly). 1. Follow the [ComfyUI manual installation](#manual-install-windows-linux) instructions for Windows and Linux. From f026b01ba576d98442839861a0eb0046bc2250d3 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:02:53 -0700 Subject: [PATCH 43/52] Update links to new comfyui desktop repo. (#14516) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 06f0d7050..c75353d36 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ ComfyUI follows a weekly release cycle targeting Monday but this regularly chang - Commits outside of the stable release tags may be very unstable and break many custom nodes. - Serves as the foundation for the desktop release -2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)** +2. **[ComfyUI Desktop](https://github.com/Comfy-Org/Comfy-Desktop)** - Builds a new release using the latest stable core version 3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)** From d202707ff23e677dbed0272047ae9ff938652592 Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Wed, 17 Jun 2026 21:02:45 +0800 Subject: [PATCH 44/52] Update TripoSplat categories (#14512) --- comfy_extras/nodes_triposplat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy_extras/nodes_triposplat.py b/comfy_extras/nodes_triposplat.py index 1848ad31a..7bf4703fe 100644 --- a/comfy_extras/nodes_triposplat.py +++ b/comfy_extras/nodes_triposplat.py @@ -65,7 +65,7 @@ class TripoSplatPreprocessImage(IO.ComfyNode): return IO.Schema( node_id="TripoSplatPreprocessImage", display_name="TripoSplat Preprocess Image", - category="3d/conditioning", + category="model/conditioning/triposplat", description="Crop center each image to a square canvas on a black background and add padding.", inputs=[ IO.Image.Input("image"), @@ -95,7 +95,7 @@ class TripoSplatConditioning(IO.ComfyNode): return IO.Schema( node_id="TripoSplatConditioning", display_name="TripoSplat Conditioning", - category="3d/conditioning", + category="model/conditioning/triposplat", description="Encode the image with DINOv3 and the Flux2 VAE into TripoSplat positive/negative " "conditioning, and create the fixed size noise target (latent + camera) for the KSampler", inputs=[ From a590d60bb1d7d47c1cdb49fc8116b0e919fc4bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:21:23 +0300 Subject: [PATCH 45/52] feat: SCAIL-2 multireference (CORE-310) (#14509) * SCAIl-2: support multiref --- comfy/ldm/wan/model.py | 19 +++---- comfy/model_base.py | 20 ++++--- comfy_extras/nodes_scail.py | 110 ++++++++++++++++++++++-------------- 3 files changed, 91 insertions(+), 58 deletions(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 282408891..1c9782a38 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -1665,7 +1665,7 @@ class SCAILWanModel(WanModel): # embeddings x = self.patch_embedding(x.float()).to(x.dtype) - if ref_mask_latents is not None: # SCAIL-2 additive mask stream + if ref_mask_latents is not None: # SCAIL-2 additive mask stream (one identity mask frame per reference, then video) x = x + self.patch_embedding_mask(ref_mask_latents.float()).to(x.dtype) grid_sizes = x.shape[2:] transformer_options["grid_sizes"] = grid_sizes @@ -1728,22 +1728,25 @@ class SCAILWanModel(WanModel): # ref_mask_flag is a scalar bool (CONDConstant, SCAIL-2 only). False => replacement mode, # which places ref/pose via H/W rope shifts instead of the animation-mode temporal offset. + # reference_latent may stack several frames: the last is the primary reference adjacent to the video, the earlier frames are additional references. def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, pose_latents=None, reference_latent=None, ref_mask_flag=None, transformer_options={}): + ref_t_patches = 0 + if reference_latent is not None: + ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0] + if ref_mask_flag is not None and not bool(ref_mask_flag): REF_ROPE_H = 120.0 POSE_ROPE_W = 120.0 - ref_t_patches = 0 - if reference_latent is not None: - ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0] main_t_patches = t - ref_t_patches + video_t_start = max(ref_t_patches - 1, 0) parts = [] if ref_t_patches > 0: ref_tf = {"rope_options": {"shift_y": REF_ROPE_H, "shift_x": 0.0, "scale_y": 1.0, "scale_x": 1.0}} parts.append(super().rope_encode(ref_t_patches, h, w, t_start=0, device=device, dtype=dtype, transformer_options=ref_tf)) if main_t_patches > 0: - parts.append(super().rope_encode(main_t_patches, h, w, t_start=0, device=device, dtype=dtype, transformer_options=transformer_options)) + parts.append(super().rope_encode(main_t_patches, h, w, t_start=video_t_start, device=device, dtype=dtype, transformer_options=transformer_options)) if pose_latents is not None: F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1] @@ -1752,7 +1755,7 @@ class SCAILWanModel(WanModel): h_shift = (h_scale - 1) / 2 w_shift = (w_scale - 1) / 2 pose_tf = {"rope_options": {"shift_y": h_shift, "shift_x": POSE_ROPE_W + w_shift, "scale_y": h_scale, "scale_x": w_scale}} - parts.append(super().rope_encode(F_pose, H_pose, W_pose, t_start=0, device=device, dtype=dtype, transformer_options=pose_tf)) + parts.append(super().rope_encode(F_pose, H_pose, W_pose, t_start=video_t_start, device=device, dtype=dtype, transformer_options=pose_tf)) return torch.cat(parts, dim=1) @@ -1761,10 +1764,6 @@ class SCAILWanModel(WanModel): if pose_latents is None: return main_freqs - ref_t_patches = 0 - if reference_latent is not None: - ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0] - F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1] # if pose is at half resolution, scale_y/scale_x=2 stretches the position range to cover the same RoPE extent as the main frames diff --git a/comfy/model_base.py b/comfy/model_base.py index ab4a11022..d143dc06f 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1747,10 +1747,14 @@ class WAN21_SCAIL(WAN21): reference_latents = kwargs.get("reference_latents", None) if reference_latents is not None: - ref_latent = self.process_latent_in(reference_latents[-1]) - ref_mask = torch.ones_like(ref_latent[:, :4]) - ref_latent = torch.cat([ref_latent, ref_mask], dim=1) - out['reference_latent'] = comfy.conds.CONDRegular(ref_latent) + # SCAIL-2 multi-reference: reference_latents[0] is the primary ref, [1:] are additional + # references. Stack as [additional..., primary] so the primary stays adjacent to the video. + ordered = list(reference_latents[1:]) + list(reference_latents[:1]) + stacked = [] + for lat in ordered: + lat = self.process_latent_in(lat) + stacked.append(torch.cat([lat, torch.ones_like(lat[:, :4])], dim=1)) + out['reference_latent'] = comfy.conds.CONDRegular(torch.cat(stacked, dim=2)) pose_latents = kwargs.get("pose_video_latent", None) if pose_latents is not None: @@ -1792,6 +1796,7 @@ class WAN21_SCAIL2(WAN21_SCAIL): if driving_mask_28ch is not None: out['sam_latents'] = comfy.conds.CONDRegular(driving_mask_28ch.movedim(1, 2).contiguous()) + # ref_mask_28ch holds one identity mask per stacked reference frame (additional refs first, then the primary ref), followed by zeros over the video frames. ref_mask_28ch = kwargs.get("ref_mask_28ch", None) if ref_mask_28ch is not None: out['ref_mask_latents'] = comfy.conds.CONDRegular(ref_mask_28ch.movedim(1, 2).contiguous()) @@ -1819,10 +1824,11 @@ class WAN21_SCAIL2(WAN21_SCAIL): # Return sliced view omitting retain_index_list return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=0) if cond_key == "ref_mask_latents" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor): - # The ref mask is just a single frame padded with frames of zeros, so just grab the first frames for all windows + # The ref mask is N leading ref frames padded with frames of zeros, so just grab the first frames for all windows full_ref_mask = cond_value.cond video_frame_count = x_in.shape[2] - if full_ref_mask.shape[2] != video_frame_count + 1: + ref_frame_count = full_ref_mask.shape[2] - video_frame_count + if ref_frame_count < 1: return None window_length = len(window.index_list) @@ -1831,7 +1837,7 @@ class WAN21_SCAIL2(WAN21_SCAIL): if anchor_index is not None and anchor_index >= 0: window_length += 1 - window_ref_mask = full_ref_mask[:, :, :window_length + 1].to(device) + window_ref_mask = full_ref_mask[:, :, :window_length + ref_frame_count].to(device) return cond_value._copy_with(window_ref_mask) return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list) diff --git a/comfy_extras/nodes_scail.py b/comfy_extras/nodes_scail.py index 007733efc..55c9897e3 100644 --- a/comfy_extras/nodes_scail.py +++ b/comfy_extras/nodes_scail.py @@ -34,14 +34,20 @@ def _unpack(track_data): return unpack_masks(packed) -def _first_frame_cx_area(masks_bool): - first = masks_bool[0].float() - H, W = first.shape[-2], first.shape[-1] - n_pixels = H * W - grid_x = torch.arange(W, device=first.device, dtype=first.dtype).view(1, W) - area = first.sum(dim=(-1, -2)).clamp_(min=1) - cx = (first * grid_x).sum(dim=(-1, -2)) / area - return (cx / W).tolist(), (area / n_pixels).tolist() +def _first_appearance_cx_area(masks_bool): + """Per object: first frame it appears in, plus centroid-x and area in that frame.""" + m = masks_bool.float() + T, H, W = m.shape[0], m.shape[-2], m.shape[-1] + grid_x = torch.arange(W, device=m.device, dtype=m.dtype).view(1, 1, 1, W) + area_t = m.sum(dim=(-1, -2)) + cx_t = (m * grid_x).sum(dim=(-1, -2)) / area_t.clamp(min=1) + present = area_t > 0 + frame_idx = torch.arange(T, device=m.device).unsqueeze(1) + first_t = torch.where(present, frame_idx, T).amin(dim=0) + sel = first_t.clamp(max=T - 1).unsqueeze(0) + cx = cx_t.gather(0, sel).squeeze(0) + area = area_t.gather(0, sel).squeeze(0) + return first_t.tolist(), (cx / W).tolist(), (area / (H * W)).tolist() def _subset_track_data(track_data, obj_indices): @@ -81,12 +87,26 @@ def _render_colored_masks(track_data, background="black"): masks_full.view(T * N_obj, 1, Hm, Wm), size=(H, W), mode="nearest" ).view(T, N_obj, H, W) > 0.5 any_mask = masks_full.any(dim=1) - obj_idx_map = masks_full.to(torch.uint8).argmax(dim=1) - color_overlay = colors[obj_idx_map] + color_overlay = colors[masks_full.to(torch.uint8).argmax(dim=1)] bg_tensor = torch.tensor(bg_rgb, device=device, dtype=color_overlay.dtype).view(1, 1, 1, 3) return torch.where(any_mask.unsqueeze(-1), color_overlay, bg_tensor.expand_as(color_overlay)) +def _render_mask_as_identity(mask, background="black"): + """Plain comfy MASK (B,H,W) or (H,W) -> (B,H,W,3) rendered as a single identity (palette[0]) + on the given background. A batch is treated as multiple views of that one subject.""" + device = comfy.model_management.intermediate_device() + dtype = comfy.model_management.intermediate_dtype() + if mask.ndim == 2: + mask = mask.unsqueeze(0) + mask = mask.to(device=device, dtype=dtype) + B, H, W = mask.shape + bg_rgb = (1.0, 1.0, 1.0) if background.startswith("white") else (0.0, 0.0, 0.0) + color = torch.tensor(DEFAULT_PALETTE[0], device=device, dtype=dtype).view(1, 1, 1, 3) + bg = torch.tensor(bg_rgb, device=device, dtype=dtype).view(1, 1, 1, 3) + return torch.where((mask > 0.5).unsqueeze(-1), color.expand(B, H, W, 3), bg.expand(B, H, W, 3)) + + def _extract_mask_to_28ch(rgb_video): """Colored RGB mask (T, H, W, 3) in [0, 1] -> SCAIL-2 28-channel binary latent (1, T_lat, 28, H_lat, W_lat). 7 per-color binary channels (white/r/g/b/y/m/c) @@ -138,8 +158,8 @@ class WanSCAILToVideo(io.ComfyNode): io.Float.Input("pose_strength", default=1.0, min=0.0, max=10.0, step=0.01, tooltip="Strength of the pose latent."), io.Float.Input("pose_start", default=0.0, min=0.0, max=1.0, step=0.01, tooltip="Start step of the pose conditioning."), io.Float.Input("pose_end", default=1.0, min=0.0, max=1.0, step=0.01, tooltip="End step of the pose conditioning."), - io.Image.Input("reference_image", optional=True, tooltip="Reference image, for multiple references composite all on single image."), - io.Image.Input("reference_image_mask", optional=True, tooltip="SCAIL-2 only. Colored reference mask at the same resolution as reference_image."), + io.Image.Input("reference_image", optional=True, tooltip="Reference image. The first image is the primary reference (composite all identities onto it). SCAIL-2: extra batch images are used as additional views (back view, close-up, occluded background), each needing a matching reference_image_mask in that identity's color."), + io.Image.Input("reference_image_mask", optional=True, tooltip="SCAIL-2 only. Colored reference mask, batch matching reference_image (first = primary reference mask, rest = identity masks for the additional reference_image)."), io.ClipVisionOutput.Input("clip_vision_output", optional=True, tooltip="CLIP vision features for conditioning. Model is trained with stretch resize to aspect ratio."), io.Int.Input("video_frame_offset", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1, tooltip="Cumulative output frame this chunk begins at. Wire from the previous chunk's video_frame_offset output."), io.Int.Input("previous_frame_count", default=5, min=1, max=nodes.MAX_RESOLUTION, step=4, tooltip="Tail frames of previous_frames to anchor. SCAIL-2 trained at 5 (81-frame chunks, 76-frame step)."), @@ -171,19 +191,21 @@ class WanSCAILToVideo(io.ComfyNode): video_frame_offset -= prev_trimmed.shape[0] video_frame_offset = max(0, video_frame_offset) - ref_latent = None if reference_image is not None: - reference_image = comfy.utils.common_upscale(reference_image[:1].movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1) - # Replacement Mode: composite ref on black bg using reference_image_mask as alpha matte - if replacement_mode and reference_image_mask is not None: - rm = comfy.utils.common_upscale(reference_image_mask[:1].movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1) - is_char = (rm[..., :3].max(dim=-1, keepdim=True).values > 0.1).to(reference_image.dtype) - reference_image = reference_image * is_char - ref_latent = vae.encode(reference_image[:, :, :, :3]) + ref_imgs = comfy.utils.common_upscale(reference_image.movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1) + n_ref = ref_imgs.shape[0] + # SCAIL-2 multi-reference: the first image is the primary ref, the rest are additional references. - if ref_latent is not None: - positive = node_helpers.conditioning_set_values(positive, {"reference_latents": [ref_latent]}, append=True) - negative = node_helpers.conditioning_set_values(negative, {"reference_latents": [ref_latent]}, append=True) + # Replacement Mode: composite each ref on black bg using its mask as alpha matte + if replacement_mode and reference_image_mask is not None: + rm = comfy.utils.common_upscale(reference_image_mask.movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1) + rm = rm[[min(i, rm.shape[0] - 1) for i in range(n_ref)]] + is_char = (rm[..., :3].max(dim=-1, keepdim=True).values > 0.1).to(ref_imgs.dtype) + ref_imgs = ref_imgs * is_char + # encode each ref individually so each stays a single latent frame (a batched encode would be treated as a video) + ref_latents = [vae.encode(ref_imgs[i:i + 1, :, :, :3]) for i in range(n_ref)] + positive = node_helpers.conditioning_set_values(positive, {"reference_latents": ref_latents}, append=True) + negative = node_helpers.conditioning_set_values(negative, {"reference_latents": ref_latents}, append=True) if clip_vision_output is not None: positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output}) @@ -221,11 +243,16 @@ class WanSCAILToVideo(io.ComfyNode): positive = node_helpers.conditioning_set_values(positive, {"driving_mask_28ch": driving_mask_28ch}) negative = node_helpers.conditioning_set_values(negative, {"driving_mask_28ch": driving_mask_28ch}) - if reference_image_mask is not None: - ref_mask_hw = comfy.utils.common_upscale(reference_image_mask[:1].movedim(-1, 1), width, height, "bicubic", "center").movedim(1, -1) - ref_mask_1f = _extract_mask_to_28ch(ref_mask_hw) + # The ref mask binds reference frames to identities, so it only applies when there's a reference image. + if reference_image_mask is not None and reference_image is not None: + ref_mask_hw = comfy.utils.common_upscale(reference_image_mask.movedim(-1, 1), width, height, "nearest-exact", "center").movedim(1, -1) + n_masks = ref_mask_hw.shape[0] + n_ref = reference_image.shape[0] + + add_masks = [_extract_mask_to_28ch(ref_mask_hw[min(i, n_masks - 1)][None]) for i in range(1, n_ref)] + ref_mask_1f = _extract_mask_to_28ch(ref_mask_hw[:1]) zeros = torch.zeros((1, latent.shape[2], 28, ref_mask_1f.shape[-2], ref_mask_1f.shape[-1]), device=ref_mask_1f.device, dtype=ref_mask_1f.dtype) - ref_mask_28ch = torch.cat([ref_mask_1f, zeros], dim=1) + ref_mask_28ch = torch.cat(add_masks + [ref_mask_1f, zeros], dim=1) positive = node_helpers.conditioning_set_values(positive, {"ref_mask_28ch": ref_mask_28ch}) negative = node_helpers.conditioning_set_values(negative, {"ref_mask_28ch": ref_mask_28ch}) @@ -244,12 +271,9 @@ class WanSCAILToVideo(io.ComfyNode): class SCAIL2ColoredMask(io.ComfyNode): - """Render SAM3 tracks for the driving pose video and (optionally) the reference - image into the two colored masks WanSCAILToVideo consumes. Shared `sort_by` - across both outputs guarantees identity K maps to the same color on both - sides, for multi-person workflow consistency. - reference_image_mask is always rendered black-bg (model convention) - pose_video_mask bg follows replacement_mode: black = Animation Mode, white = Replacement Mode + """Render SAM3 tracks for the driving pose video and reference image(s) into the + colored masks WanSCAILToVideo consumes. Shared `sort_by` keeps each identity on the + same color across both outputs. """ @classmethod @@ -260,10 +284,12 @@ class SCAIL2ColoredMask(io.ComfyNode): category="model/conditioning/wan/scail", inputs=[ SAM3TrackData.Input("driving_track_data", tooltip="SAM3 track of the driving pose video. Will be rendered into the pose_video_mask output."), - SAM3TrackData.Input("ref_track_data", optional=True, tooltip="SAM3 track of the reference image."), - io.String.Input("object_indices", default="", tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."), + io.MultiType.Input("ref_track_data", [SAM3TrackData, io.Mask], optional=True, display_name="reference_masks", + tooltip="SAM3 track of the reference image(s) (one identity per object, colored in batch order), or a plain MASK of the reference subject (rendered as a single identity)."), + io.String.Input("object_indices", default="", + tooltip="Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all."), io.Combo.Input("sort_by", options=["none", "left_to_right", "area"], default="left_to_right", - tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). left_to_right = leftmost object (by first-frame centroid) gets the first color; area = biggest object (by first-frame mask area) gets the first color; none = keep SAM3's order."), + tooltip="Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). Objects that appear in earlier frames always come first; within a frame, left_to_right = leftmost object (by centroid at first appearance) gets the first color, area = biggest object (by mask area at first appearance) gets the first color; none = keep SAM3's order."), io.Boolean.Input("replacement_mode", default=False, tooltip="False = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). " "True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background)."), @@ -280,11 +306,11 @@ class SCAIL2ColoredMask(io.ComfyNode): def _prep(td): masks_bool = _unpack(td) if sort_by != "none" and masks_bool is not None: - cx, area = _first_frame_cx_area(masks_bool) + first_t, cx, area = _first_appearance_cx_area(masks_bool) if sort_by == "left_to_right": - order = sorted(range(len(cx)), key=lambda i: cx[i]) + order = sorted(range(len(cx)), key=lambda i: (first_t[i], cx[i])) else: # "area" - order = sorted(range(len(area)), key=lambda i: -area[i]) + order = sorted(range(len(area)), key=lambda i: (first_t[i], -area[i])) td = _subset_track_data(td, order) if object_indices.strip(): indices = [int(i.strip()) for i in object_indices.split(",") if i.strip().isdigit()] @@ -300,8 +326,10 @@ class SCAIL2ColoredMask(io.ComfyNode): ref_bg = "black" if replacement_mode else "white" if ref_track_data is not None: - ref = _prep(ref_track_data) - reference_image_mask = _render_colored_masks(ref, ref_bg) + if isinstance(ref_track_data, torch.Tensor): # plain comfy MASK + reference_image_mask = _render_mask_as_identity(ref_track_data, ref_bg) + else: + reference_image_mask = _render_colored_masks(_prep(ref_track_data), ref_bg) else: H, W = drv["orig_size"] fill_value = 1.0 if ref_bg == "white" else 0.0 From ca3dbe206c2fea84f2af4371ca13e9f2bfeb82e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Wed, 17 Jun 2026 18:45:06 +0300 Subject: [PATCH 46/52] Allow using Qwen3-VL as flux2 klein text encoder (again) (#14526) --- comfy/sd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/comfy/sd.py b/comfy/sd.py index 688e6db90..348fe4958 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -1622,6 +1622,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) clip_target.clip = comfy.text_encoders.ideogram4.te_qwen3vl(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Qwen3VLTokenizer + elif clip_type in (CLIPType.FLUX, CLIPType.FLUX2): # Flux2 Klein reuses the Qwen3-VL LM (3-layer tap -> 12288); visual unused. + klein_model_type = "qwen3_8b" if te_model == TEModel.QWEN3VL_8B else "qwen3_4b" + clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type=klein_model_type) + clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B if te_model == TEModel.QWEN3VL_8B else comfy.text_encoders.flux.KleinTokenizer else: clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) qwen3vl_type = {TEModel.QWEN3VL_4B: "qwen3vl_4b", TEModel.QWEN3VL_8B: "qwen3vl_8b"}[te_model] From e25c391888618967f7ef199f466ff29da2ed14b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Thu, 18 Jun 2026 00:22:36 +0300 Subject: [PATCH 47/52] feat: Support Boogu-Image (CORE-308) (#14523) --- comfy/ldm/boogu/model.py | 321 ++++++++++++++++++++++++++++++++++ comfy/ldm/omnigen/omnigen2.py | 2 +- comfy/model_base.py | 6 + comfy/model_detection.py | 10 ++ comfy/sd.py | 6 + comfy/supported_models.py | 23 +++ comfy/text_encoders/boogu.py | 58 ++++++ comfy_extras/nodes_boogu.py | 96 ++++++++++ nodes.py | 3 +- 9 files changed, 523 insertions(+), 2 deletions(-) create mode 100644 comfy/ldm/boogu/model.py create mode 100644 comfy/text_encoders/boogu.py create mode 100644 comfy_extras/nodes_boogu.py diff --git a/comfy/ldm/boogu/model.py b/comfy/ldm/boogu/model.py new file mode 100644 index 000000000..966f3c583 --- /dev/null +++ b/comfy/ldm/boogu/model.py @@ -0,0 +1,321 @@ +# Boogu-Image-0.1 transformer +# Architecture is an OmniGen2 derivative (see comfy/ldm/omnigen/omnigen2.py) with an +# added dual-stream ("double_stream") stage before the single-stream layers, conditioned +# by a Qwen3-VL multimodal LLM. Reuses the OmniGen2/Lumina building blocks and the Flux +# RoPE core, the only new component is the double-stream block + the hybrid forward order. + +from typing import Optional, Tuple + +import torch +import torch.nn as nn +from einops import rearrange + +import comfy.ldm.common_dit +import comfy.ldm.omnigen.omnigen2 +from comfy.ldm.modules.attention import optimized_attention_masked +from comfy.ldm.omnigen.omnigen2 import ( + OmniGen2RotaryPosEmbed, + Lumina2CombinedTimestepCaptionEmbedding, + LuminaRMSNormZero, + LuminaLayerNormContinuous, + LuminaFeedForward, + Attention, + OmniGen2TransformerBlock, + apply_rotary_emb, +) + +class BooguDoubleStreamProcessor(nn.Module): + # Joint attention over [instruct ; img] with separate per-stream q/k/v and output projections. + def __init__(self, dim, head_dim, heads, kv_heads, dtype=None, device=None, operations=None): + super().__init__() + query_dim = head_dim * heads + kv_dim = head_dim * kv_heads + + self.img_to_q = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device) + self.img_to_k = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device) + self.img_to_v = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device) + + self.instruct_to_q = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device) + self.instruct_to_k = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device) + self.instruct_to_v = operations.Linear(query_dim, kv_dim, bias=False, dtype=dtype, device=device) + + self.instruct_out = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device) + self.img_out = operations.Linear(query_dim, query_dim, bias=False, dtype=dtype, device=device) + + def forward(self, attn, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask=None, transformer_options={}): + batch_size = img_hidden_states.shape[0] + L_instruct = instruct_hidden_states.shape[1] + + img_q = self.img_to_q(img_hidden_states) + img_k = self.img_to_k(img_hidden_states) + img_v = self.img_to_v(img_hidden_states) + + instruct_q = self.instruct_to_q(instruct_hidden_states) + instruct_k = self.instruct_to_k(instruct_hidden_states) + instruct_v = self.instruct_to_v(instruct_hidden_states) + + # Concatenate instruction first, then image (matches reference processor order). + query = torch.cat([instruct_q, img_q], dim=1) + key = torch.cat([instruct_k, img_k], dim=1) + value = torch.cat([instruct_v, img_v], dim=1) + + query = query.view(batch_size, -1, attn.heads, attn.dim_head) + key = key.view(batch_size, -1, attn.kv_heads, attn.dim_head) + value = value.view(batch_size, -1, attn.kv_heads, attn.dim_head) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if rotary_emb is not None: + query = apply_rotary_emb(query, rotary_emb) + key = apply_rotary_emb(key, rotary_emb) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + if attn.kv_heads < attn.heads: + key = key.repeat_interleave(attn.heads // attn.kv_heads, dim=1) + value = value.repeat_interleave(attn.heads // attn.kv_heads, dim=1) + + hidden_states = optimized_attention_masked(query, key, value, attn.heads, attention_mask, skip_reshape=True, transformer_options=transformer_options) + + # Split back to instruction/image, apply per-stream output projections, recombine. + instruct_hidden_states = self.instruct_out(hidden_states[:, :L_instruct]) + img_hidden_states = self.img_out(hidden_states[:, L_instruct:]) + hidden_states = torch.cat([instruct_hidden_states, img_hidden_states], dim=1) + + hidden_states = attn.to_out[0](hidden_states) + return hidden_states + + +class BooguJointAttention(nn.Module): + # Holds the shared q/k RMSNorm + final output projection + def __init__(self, dim, head_dim, heads, kv_heads, eps=1e-5, dtype=None, device=None, operations=None): + super().__init__() + self.heads = heads + self.kv_heads = kv_heads + self.dim_head = head_dim + self.scale = head_dim ** -0.5 + + self.norm_q = operations.RMSNorm(head_dim, eps=eps, dtype=dtype, device=device) + self.norm_k = operations.RMSNorm(head_dim, eps=eps, dtype=dtype, device=device) + self.to_out = nn.Sequential( + operations.Linear(heads * head_dim, dim, bias=False, dtype=dtype, device=device), + nn.Dropout(0.0), + ) + self.processor = BooguDoubleStreamProcessor(dim, head_dim, heads, kv_heads, dtype=dtype, device=device, operations=operations) + + def forward(self, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask=None, transformer_options={}): + return self.processor(self, img_hidden_states, instruct_hidden_states, rotary_emb, attention_mask, transformer_options=transformer_options) + + +class BooguDoubleStreamBlock(nn.Module): + # Dual-stream block: joint attention over [instruct ; img] + image self-attention, each stream with its own modulation/MLP. + def __init__(self, dim, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, dtype=None, device=None, operations=None): + super().__init__() + head_dim = dim // num_attention_heads + + self.img_instruct_attn = BooguJointAttention(dim, head_dim, num_attention_heads, num_kv_heads, eps=1e-5, dtype=dtype, device=device, operations=operations) + self.img_self_attn = Attention( + query_dim=dim, dim_head=head_dim, heads=num_attention_heads, kv_heads=num_kv_heads, + eps=1e-5, bias=False, dtype=dtype, device=device, operations=operations, + ) + + self.img_feed_forward = LuminaFeedForward(dim=dim, inner_dim=4 * dim, multiple_of=multiple_of, dtype=dtype, device=device, operations=operations) + self.instruct_feed_forward = LuminaFeedForward(dim=dim, inner_dim=4 * dim, multiple_of=multiple_of, dtype=dtype, device=device, operations=operations) + + self.img_norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations) + self.img_norm2 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations) + self.img_norm3 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations) + self.instruct_norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations) + self.instruct_norm2 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations) + + self.img_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + self.img_self_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + self.img_ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + self.img_ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + + self.instruct_attn_norm = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + self.instruct_ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + self.instruct_ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device) + + def forward(self, img_hidden_states, instruct_hidden_states, joint_rotary_emb, img_rotary_emb, temb, joint_attention_mask=None, img_attention_mask=None, transformer_options={}): + L_instruct = instruct_hidden_states.shape[1] + + img_norm1_out, img_gate_msa, img_scale_mlp, img_gate_mlp = self.img_norm1(img_hidden_states, temb) + img_norm2_out, img_shift_mlp, _, _ = self.img_norm2(img_hidden_states, temb) + img_norm3_out, img_gate_self, _, _ = self.img_norm3(img_hidden_states, temb) + + instruct_norm1_out, instruct_gate_msa, instruct_scale_mlp, instruct_gate_mlp = self.instruct_norm1(instruct_hidden_states, temb) + instruct_norm2_out, instruct_shift_mlp, _, _ = self.instruct_norm2(instruct_hidden_states, temb) + + joint_attn_out = self.img_instruct_attn(img_norm1_out, instruct_norm1_out, joint_rotary_emb, joint_attention_mask, transformer_options=transformer_options) + instruct_attn_out = joint_attn_out[:, :L_instruct] + img_attn_out = joint_attn_out[:, L_instruct:] + + img_self_attn_out = self.img_self_attn(img_norm3_out, img_norm3_out, img_attention_mask, img_rotary_emb, transformer_options=transformer_options) + + img_hidden_states = img_hidden_states + img_gate_msa.unsqueeze(1).tanh() * self.img_attn_norm(img_attn_out) + img_hidden_states = img_hidden_states + img_gate_self.unsqueeze(1).tanh() * self.img_self_attn_norm(img_self_attn_out) + img_mlp_input = (1 + img_scale_mlp.unsqueeze(1)) * img_norm2_out + img_shift_mlp.unsqueeze(1) + img_mlp_out = self.img_feed_forward(self.img_ffn_norm1(img_mlp_input)) + img_hidden_states = img_hidden_states + img_gate_mlp.unsqueeze(1).tanh() * self.img_ffn_norm2(img_mlp_out) + + instruct_hidden_states = instruct_hidden_states + instruct_gate_msa.unsqueeze(1).tanh() * self.instruct_attn_norm(instruct_attn_out) + instruct_mlp_input = (1 + instruct_scale_mlp.unsqueeze(1)) * instruct_norm2_out + instruct_shift_mlp.unsqueeze(1) + instruct_mlp_out = self.instruct_feed_forward(self.instruct_ffn_norm1(instruct_mlp_input)) + instruct_hidden_states = instruct_hidden_states + instruct_gate_mlp.unsqueeze(1).tanh() * self.instruct_ffn_norm2(instruct_mlp_out) + + return img_hidden_states, instruct_hidden_states + + +class BooguTransformer2DModel(nn.Module): + def __init__( + self, + patch_size: int = 2, + in_channels: int = 16, + out_channels: Optional[int] = None, + hidden_size: int = 3360, + num_layers: int = 32, + num_double_stream_layers: int = 8, + num_refiner_layers: int = 2, + num_attention_heads: int = 28, + num_kv_heads: int = 7, + multiple_of: int = 256, + ffn_dim_multiplier: Optional[float] = None, + norm_eps: float = 1e-5, + axes_dim_rope: Tuple[int, int, int] = (40, 40, 40), + axes_lens: Tuple[int, int, int] = (2048, 1664, 1664), + instruction_feat_dim: int = 4096, + timestep_scale: float = 1000.0, + image_model=None, + device=None, dtype=None, operations=None, + ): + super().__init__() + + self.patch_size = patch_size + self.out_channels = out_channels or in_channels + self.hidden_size = hidden_size + self.dtype = dtype + + self.rope_embedder = OmniGen2RotaryPosEmbed( + theta=10000, + axes_dim=axes_dim_rope, + axes_lens=axes_lens, + patch_size=patch_size, + ) + + self.x_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device) + self.ref_image_patch_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device) + + self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding( + hidden_size=hidden_size, + text_feat_dim=instruction_feat_dim, + norm_eps=norm_eps, + timestep_scale=timestep_scale, dtype=dtype, device=device, operations=operations + ) + + self.noise_refiner = nn.ModuleList([ + OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations) + for _ in range(num_refiner_layers) + ]) + + self.ref_image_refiner = nn.ModuleList([ + OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations) + for _ in range(num_refiner_layers) + ]) + + self.context_refiner = nn.ModuleList([ + OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=False, dtype=dtype, device=device, operations=operations) + for _ in range(num_refiner_layers) + ]) + + self.double_stream_layers = nn.ModuleList([ + BooguDoubleStreamBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, dtype=dtype, device=device, operations=operations) + for _ in range(num_double_stream_layers) + ]) + + self.single_stream_layers = nn.ModuleList([ + OmniGen2TransformerBlock(hidden_size, num_attention_heads, num_kv_heads, multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations) + for _ in range(num_layers) + ]) + + self.norm_out = LuminaLayerNormContinuous( + embedding_dim=hidden_size, + conditioning_embedding_dim=min(hidden_size, 1024), + elementwise_affine=False, + eps=1e-6, + out_dim=patch_size * patch_size * self.out_channels, dtype=dtype, device=device, operations=operations + ) + + self.image_index_embedding = nn.Parameter(torch.empty(5, hidden_size, device=device, dtype=dtype)) + + # Patchify/refine helpers are identical to OmniGen2; reuse via bound methods. + flat_and_pad_to_seq = comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel.flat_and_pad_to_seq + img_patch_embed_and_refine = comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel.img_patch_embed_and_refine + + def forward(self, x, timesteps, context, num_tokens, ref_latents=None, attention_mask=None, transformer_options={}, **kwargs): + B, C, H, W = x.shape + hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) + _, _, H_padded, W_padded = hidden_states.shape + timestep = 1.0 - timesteps + text_hidden_states = context + text_attention_mask = attention_mask + ref_image_hidden_states = ref_latents + device = hidden_states.device + + temb, text_hidden_states = self.time_caption_embed(timestep, text_hidden_states, hidden_states[0].dtype) + + ( + hidden_states, ref_image_hidden_states, + img_mask, ref_img_mask, + l_effective_ref_img_len, l_effective_img_len, + ref_img_sizes, img_sizes, + ) = self.flat_and_pad_to_seq(hidden_states, ref_image_hidden_states) + + ( + context_rotary_emb, ref_img_rotary_emb, noise_rotary_emb, + rotary_emb, encoder_seq_lengths, seq_lengths, + ) = self.rope_embedder( + hidden_states.shape[0], text_hidden_states.shape[1], [num_tokens] * text_hidden_states.shape[0], + l_effective_ref_img_len, l_effective_img_len, + ref_img_sizes, img_sizes, device, + ) + + for layer in self.context_refiner: + text_hidden_states = layer(text_hidden_states, text_attention_mask, context_rotary_emb, transformer_options=transformer_options) + + img_len = hidden_states.shape[1] + combined_img_hidden_states = self.img_patch_embed_and_refine( + hidden_states, ref_image_hidden_states, + img_mask, ref_img_mask, + noise_rotary_emb, ref_img_rotary_emb, + l_effective_ref_img_len, l_effective_img_len, + temb, + transformer_options=transformer_options, + ) + + # Double-stream stage: the image self-attention only sees the [ref ; noise] tokens, + # which sit after the instruction tokens in the joint rope. + L_instruct = text_hidden_states.shape[1] + combined_img_rotary_emb = rotary_emb[:, L_instruct:] + for layer in self.double_stream_layers: + combined_img_hidden_states, text_hidden_states = layer( + combined_img_hidden_states, text_hidden_states, + rotary_emb, combined_img_rotary_emb, temb, + joint_attention_mask=None, img_attention_mask=None, + transformer_options=transformer_options, + ) + + hidden_states = torch.cat([text_hidden_states, combined_img_hidden_states], dim=1) + + for layer in self.single_stream_layers: + hidden_states = layer(hidden_states, None, rotary_emb, temb, transformer_options=transformer_options) + + hidden_states = self.norm_out(hidden_states, temb) + + p = self.patch_size + output = rearrange(hidden_states[:, -img_len:], 'b (h w) (p1 p2 c) -> b c (h p1) (w p2)', h=H_padded // p, w=W_padded // p, p1=p, p2=p)[:, :, :H, :W] + + return -output diff --git a/comfy/ldm/omnigen/omnigen2.py b/comfy/ldm/omnigen/omnigen2.py index e9ca5229d..b8da4cf39 100644 --- a/comfy/ldm/omnigen/omnigen2.py +++ b/comfy/ldm/omnigen/omnigen2.py @@ -22,7 +22,7 @@ def apply_rotary_emb(x, freqs_cis): def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - return F.silu(x) * y + return F.silu(x, inplace=True).mul_(y) class TimestepEmbedding(nn.Module): diff --git a/comfy/model_base.py b/comfy/model_base.py index d143dc06f..f49da50ae 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -54,6 +54,7 @@ import comfy.ldm.pixeldit.model import comfy.ldm.pixeldit.pid import comfy.ldm.ace.model import comfy.ldm.omnigen.omnigen2 +import comfy.ldm.boogu.model import comfy.ldm.qwen_image.model import comfy.ldm.ideogram4.model import comfy.ldm.kandinsky5.model @@ -2103,6 +2104,11 @@ class Omnigen2(BaseModel): out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16]) return out +class Boogu(Omnigen2): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super(Omnigen2, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.boogu.model.BooguTransformer2DModel) + self.memory_usage_factor_conds = ("ref_latents",) + class QwenImage(BaseModel): def __init__(self, model_config, model_type=ModelType.FLUX, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.qwen_image.model.QwenImageTransformer2DModel) diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 7d0cab308..b773f0393 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -761,6 +761,16 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): return dit_config + if '{}double_stream_layers.0.img_instruct_attn.processor.img_to_q.weight'.format(key_prefix) in state_dict_keys: # Boogu-Image (OmniGen2 derivative + dual-stream stage) + dit_config = {} + dit_config["image_model"] = "boogu" + dit_config["hidden_size"] = state_dict['{}x_embedder.weight'.format(key_prefix)].shape[0] + dit_config["num_layers"] = count_blocks(state_dict_keys, '{}single_stream_layers.'.format(key_prefix) + '{}.') + dit_config["num_double_stream_layers"] = count_blocks(state_dict_keys, '{}double_stream_layers.'.format(key_prefix) + '{}.') + dit_config["num_refiner_layers"] = count_blocks(state_dict_keys, '{}noise_refiner.'.format(key_prefix) + '{}.') + dit_config["instruction_feat_dim"] = state_dict['{}time_caption_embed.caption_embedder.0.weight'.format(key_prefix)].shape[0] + return dit_config + if '{}time_caption_embed.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys: # Omnigen2 dit_config = {} dit_config["image_model"] = "omnigen2" diff --git a/comfy/sd.py b/comfy/sd.py index 348fe4958..d9b1c0553 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -68,6 +68,7 @@ import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image import comfy.text_encoders.qwen35 import comfy.text_encoders.qwen3vl +import comfy.text_encoders.boogu import comfy.text_encoders.ernie import comfy.text_encoders.gemma4 import comfy.text_encoders.cogvideo @@ -1301,6 +1302,7 @@ class CLIPType(Enum): LENS = 28 PIXELDIT = 29 IDEOGRAM4 = 30 + BOOGU = 31 @@ -1622,6 +1624,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) clip_target.clip = comfy.text_encoders.ideogram4.te_qwen3vl(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Qwen3VLTokenizer + elif clip_type == CLIPType.BOOGU and te_model == TEModel.QWEN3VL_8B: # Boogu-Image: full Qwen3-VL-8B, last hidden state, no-think template. + clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."}) + clip_target.clip = comfy.text_encoders.boogu.te(**llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.boogu.BooguTokenizer elif clip_type in (CLIPType.FLUX, CLIPType.FLUX2): # Flux2 Klein reuses the Qwen3-VL LM (3-layer tap -> 12288); visual unused. klein_model_type = "qwen3_8b" if te_model == TEModel.QWEN3VL_8B else "qwen3_4b" clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type=klein_model_type) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 3be935577..d78b6ae87 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -25,6 +25,7 @@ import comfy.text_encoders.hunyuan_image import comfy.text_encoders.kandinsky5 import comfy.text_encoders.z_image import comfy.text_encoders.ideogram4 +import comfy.text_encoders.boogu import comfy.text_encoders.anima import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image @@ -1758,6 +1759,27 @@ class Omnigen2(supported_models_base.BASE): hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref)) return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect)) +class Boogu(Omnigen2): + unet_config = { + "image_model": "boogu", + } + + sampling_settings = { + "multiplier": 1.0, + "shift": 3.16, + } + + memory_usage_factor = 1.95 #TODO + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.Boogu(self, device=device) + return out + + def clip_target(self, state_dict={}): + pref = self.text_encoder_key_prefix[0] + hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3vl_8b.transformer.".format(pref)) + return supported_models_base.ClipTarget(comfy.text_encoders.boogu.BooguTokenizer, comfy.text_encoders.boogu.te(**hunyuan_detect)) + class Ideogram4(supported_models_base.BASE): unet_config = { "image_model": "ideogram4", @@ -2300,6 +2322,7 @@ models = [ ACEStep, ACEStep15, Omnigen2, + Boogu, QwenImage, Ideogram4, Flux2, diff --git a/comfy/text_encoders/boogu.py b/comfy/text_encoders/boogu.py new file mode 100644 index 000000000..d9de92f10 --- /dev/null +++ b/comfy/text_encoders/boogu.py @@ -0,0 +1,58 @@ +"""Boogu-Image text encoder: full Qwen3-VL-8B, last hidden state (4096-dim). + +Boogu uses the final hidden state of Qwen3-VL as the per-token instruction feature +(num_instruction_feature_layers=1, reduce_type=mean -> just the last layer). +The model itself is the standard Qwen3-VL TE, only the chat template differs +(a fixed system prompt and no block). +""" + +import comfy.text_encoders.qwen3vl +from comfy import sd1_clip + + +# System prompts from the reference pipeline (pipeline_boogu.py). +# T2I (non-empty instruction, no image) uses the helpful-assistant prompt +# everything else (the CFG negative / "drop" condition, and any image case) uses the TI2I "describe" prompt. +BOOGU_T2I_SYSTEM = "You are a helpful assistant that generates high-quality images based on user instructions. The instructions are as follows." +BOOGU_DROP_SYSTEM = "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate." + + +class BooguTokenizer(comfy.text_encoders.qwen3vl.Qwen3VLTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type="qwen3vl_8b") + # apply_chat_template without add_generation_prompt + self.llama_template = "<|im_start|>system\n" + BOOGU_T2I_SYSTEM + "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n" + self.llama_template_images = "<|im_start|>system\n" + BOOGU_DROP_SYSTEM + "<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n" + # Reference SYSTEM_PROMPT_DROP: used for the empty negative/uncond instruction. + self.llama_template_drop = "<|im_start|>system\n" + BOOGU_DROP_SYSTEM + "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n" + + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=True, **kwargs): + if llama_template is None and len(images) == 0 and text.strip() == "": + llama_template = self.llama_template_drop + # Boogu conditions on the no-think template; thinking=True drops the empty block qwen3vl adds by default. + return super().tokenize_with_weights(text, return_word_ids=return_word_ids, llama_template=llama_template, images=images, prevent_empty_text=prevent_empty_text, thinking=thinking, **kwargs) + + +class BooguQwen3VLClipModel(comfy.text_encoders.qwen3vl.Qwen3VLClipModel): + def __init__(self, device="cpu", dtype=None, attention_mask=True, model_options={}, model_type="qwen3vl_8b"): + super().__init__(device=device, dtype=dtype, attention_mask=attention_mask, model_options=model_options, model_type=model_type) + # apply the final RMSNorm to the tapped last layer + self.layer_norm_hidden_state = True + + +class BooguTEModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + clip_model = lambda **kw: BooguQwen3VLClipModel(**kw, model_type="qwen3vl_8b") + super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=clip_model, model_options=model_options) + + +def te(dtype_llama=None, llama_quantization_metadata=None): + class BooguTEModel_(BooguTEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if dtype_llama is not None: + dtype = dtype_llama + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + super().__init__(device=device, dtype=dtype, model_options=model_options) + return BooguTEModel_ diff --git a/comfy_extras/nodes_boogu.py b/comfy_extras/nodes_boogu.py new file mode 100644 index 000000000..590623e12 --- /dev/null +++ b/comfy_extras/nodes_boogu.py @@ -0,0 +1,96 @@ +import math + +import node_helpers +import comfy.utils +from typing_extensions import override +from comfy_api.latest import ComfyExtension, io + + +class TextEncodeBooguEdit(io.ComfyNode): + """Boogu-Image Edit conditioning. + + The edit image is used twice, matching the reference pipeline: + - Qwen3-VL vision tokens (instruction understanding) -> positive only + - VAE reference latent (image identity) -> positive and negative + The ref latent is in both conds so it cancels under CFG (identity preserved); + the vision tokens are only in the positive so CFG amplifies the instruction. + The tokenizer selects the right system prompt automatically (image -> TI2I, + empty negative -> DROP), so no template plumbing is needed here. + """ + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="TextEncodeBooguEdit", + category="model/conditioning/boogu", + inputs=[ + io.Clip.Input("clip"), + io.String.Input("prompt", multiline=True, dynamic_prompts=True), + io.Vae.Input("vae"), + io.Autogrow.Input( + "images", + template=io.Autogrow.TemplateNames( + io.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 17)], + min=1, + ), + tooltip="Reference image(s) to edit. Boogu focuses on one reference per sample; more are allowed.", + ), + ], + outputs=[ + io.Conditioning.Output(display_name="positive"), + io.Conditioning.Output(display_name="negative"), + ], + ) + + @classmethod + def execute(cls, clip, prompt, vae=None, images: io.Autogrow.Type = None) -> io.NodeOutput: + ref_latents = [] + images_vl = [] + + images = images or {} + for name in sorted(images, key=lambda n: int(n.rsplit("_", 1)[-1])): + image = images[name] + if image is None: + continue + samples = image.movedim(-1, 1) + + # Vision tower input: the reference caps the VLM image at 384x384 + # (max_vlm_input_pil_pixels in pipeline_boogu.py). + total = int(384 * 384) + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + s = comfy.utils.common_upscale(samples, width, height, "area", "disabled") + images_vl.append(s.movedim(1, -1)[:, :, :, :3]) + + # Reference latent: align to 16 px (VAE /8 * patch_size 2). + if vae is not None: + total = int(1024 * 1024) + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + width = round(samples.shape[3] * scale_by / 16.0) * 16 + height = round(samples.shape[2] * scale_by / 16.0) * 16 + s = comfy.utils.common_upscale(samples, width, height, "area", "disabled") + ref_latents.append(vae.encode(s.movedim(1, -1)[:, :, :, :3])) + + # positive: instruction + vision tokens; negative: empty (no vision). Ref latent on both. + positive = clip.encode_from_tokens_scheduled(clip.tokenize(prompt, images=images_vl)) + negative = clip.encode_from_tokens_scheduled(clip.tokenize("")) + + if len(ref_latents) > 0: + positive = node_helpers.conditioning_set_values(positive, {"reference_latents": ref_latents}, append=True) + negative = node_helpers.conditioning_set_values(negative, {"reference_latents": ref_latents}, append=True) + + return io.NodeOutput(positive, negative) + + +class BooguExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + TextEncodeBooguEdit, + ] + + +async def comfy_entrypoint() -> BooguExtension: + return BooguExtension() diff --git a/nodes.py b/nodes.py index bb4649478..0b3fdab63 100644 --- a/nodes.py +++ b/nodes.py @@ -969,7 +969,7 @@ class CLIPLoader: @classmethod def INPUT_TYPES(s): return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ), - "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4"], ), + "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4", "boogu"], ), }, "optional": { "device": (["default", "cpu"], {"advanced": True}), @@ -2425,6 +2425,7 @@ async def init_builtin_extra_nodes(): "nodes_tcfg.py", "nodes_context_windows.py", "nodes_qwen.py", + "nodes_boogu.py", "nodes_chroma_radiance.py", "nodes_pid.py", "nodes_model_patch.py", From 52257bb435162ff345fcc8cfde99c52c8923589d Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 17 Jun 2026 15:42:29 -0700 Subject: [PATCH 48/52] Add negative prompt to boogu edit node and set min images to 0. (#14529) --- comfy/supported_models.py | 2 +- comfy_extras/nodes_boogu.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index d78b6ae87..cc05908ee 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -1769,7 +1769,7 @@ class Boogu(Omnigen2): "shift": 3.16, } - memory_usage_factor = 1.95 #TODO + memory_usage_factor = 2.15 def get_model(self, state_dict, prefix="", device=None): out = model_base.Boogu(self, device=device) diff --git a/comfy_extras/nodes_boogu.py b/comfy_extras/nodes_boogu.py index 590623e12..f3951c290 100644 --- a/comfy_extras/nodes_boogu.py +++ b/comfy_extras/nodes_boogu.py @@ -26,13 +26,14 @@ class TextEncodeBooguEdit(io.ComfyNode): inputs=[ io.Clip.Input("clip"), io.String.Input("prompt", multiline=True, dynamic_prompts=True), + io.String.Input("negative_prompt", multiline=True, dynamic_prompts=True, advanced=True), io.Vae.Input("vae"), io.Autogrow.Input( "images", template=io.Autogrow.TemplateNames( io.Image.Input("image"), names=[f"image_{i}" for i in range(1, 17)], - min=1, + min=0, ), tooltip="Reference image(s) to edit. Boogu focuses on one reference per sample; more are allowed.", ), @@ -44,7 +45,7 @@ class TextEncodeBooguEdit(io.ComfyNode): ) @classmethod - def execute(cls, clip, prompt, vae=None, images: io.Autogrow.Type = None) -> io.NodeOutput: + def execute(cls, clip, prompt, negative_prompt, vae=None, images: io.Autogrow.Type = None) -> io.NodeOutput: ref_latents = [] images_vl = [] @@ -75,7 +76,7 @@ class TextEncodeBooguEdit(io.ComfyNode): # positive: instruction + vision tokens; negative: empty (no vision). Ref latent on both. positive = clip.encode_from_tokens_scheduled(clip.tokenize(prompt, images=images_vl)) - negative = clip.encode_from_tokens_scheduled(clip.tokenize("")) + negative = clip.encode_from_tokens_scheduled(clip.tokenize(negative_prompt)) if len(ref_latents) > 0: positive = node_helpers.conditioning_set_values(positive, {"reference_latents": ref_latents}, append=True) From 191a75a2cdc021236fb7cbd4631511246b79a8c6 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 18 Jun 2026 02:54:53 +0300 Subject: [PATCH 49/52] [Partner Nodes] feat(Kling): add support for Kling V3-Turbo model (#14528) --- comfy_api_nodes/apis/kling.py | 56 ++++++++++++++++ comfy_api_nodes/nodes_kling.py | 116 ++++++++++++++++++++++++++++++--- 2 files changed, 163 insertions(+), 9 deletions(-) diff --git a/comfy_api_nodes/apis/kling.py b/comfy_api_nodes/apis/kling.py index fe0f97cb3..2c98c23b7 100644 --- a/comfy_api_nodes/apis/kling.py +++ b/comfy_api_nodes/apis/kling.py @@ -149,3 +149,59 @@ class MotionControlRequest(BaseModel): character_orientation: str = Field(...) mode: str = Field(..., description="'pro' or 'std'") model_name: str = Field(...) + + +class Kling3TurboSettings(BaseModel): + resolution: str = Field("720p", description="'720p' or '1080p'") + aspect_ratio: str | None = Field(None, description="'16:9'/'9:16'/'1:1'; text-to-video only") + duration: int = Field(5, description="3-15 second") + + +class Kling3TurboText2VideoRequest(BaseModel): + prompt: str = Field(..., description="<=3072 chars; may use multi-shot 'shot n, m, words; ...'") + settings: Kling3TurboSettings | None = Field(None) + + +class Kling3TurboContent(BaseModel): + type: str = Field(..., description="'prompt' or 'first_frame'") + text: str | None = Field(None, description="for type=prompt; <=2500 chars") + url: str | None = Field(None, description="for type=first_frame") + + +class Kling3TurboImage2VideoRequest(BaseModel): + contents: list[Kling3TurboContent] = Field(..., description="prompt + first_frame materials") + settings: Kling3TurboSettings | None = Field(None) + + +class Kling3TurboCreateData(BaseModel): + id: str | None = Field(None, description="Task ID") + status: str | None = Field(None) + message: str | None = Field(None) + + +class Kling3TurboCreateResponse(BaseModel): + code: int | None = Field(None) + message: str | None = Field(None) + request_id: str | None = Field(None) + data: Kling3TurboCreateData | None = Field(None) + + +class Kling3TurboOutput(BaseModel): + type: str | None = Field(None, description="'video', 'image', 'audio', ...") + id: str | None = Field(None) + url: str | None = Field(None) + duration: str | None = Field(None) + + +class Kling3TurboTaskData(BaseModel): + id: str | None = Field(None) + status: str | None = Field(None, description="submitted | processing | succeeded | failed") + message: str | None = Field(None) + outputs: list[Kling3TurboOutput] | None = Field(None) + + +class Kling3TurboQueryResponse(BaseModel): + code: int | None = Field(None) + message: str | None = Field(None) + request_id: str | None = Field(None) + data: list[Kling3TurboTaskData] | None = Field(None) diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index c81d3503d..b27de2549 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -60,6 +60,12 @@ from comfy_api_nodes.apis.kling import ( OmniProImageRequest, OmniProReferences2VideoRequest, OmniProText2VideoRequest, + Kling3TurboSettings, + Kling3TurboText2VideoRequest, + Kling3TurboContent, + Kling3TurboImage2VideoRequest, + Kling3TurboCreateResponse, + Kling3TurboQueryResponse, TaskStatusResponse, TextToVideoWithAudioRequest, ) @@ -2847,6 +2853,67 @@ class MotionControl(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) +def build_turbo_shot_prompt(multi_prompt: list[MultiPromptEntry]) -> str: + """Render storyboard entries into the Turbo multi-shot prompt 'shot n, m, words; ...'.""" + return "; ".join(f"shot {i}, {int(e.duration)}, {e.prompt}" for i, e in enumerate(multi_prompt, 1)) + ";" + + +def _turbo_video_url(response: Kling3TurboQueryResponse) -> str: + """Extract the result video URL from a /tasks response (data[].outputs[] where type == 'video').""" + task = response.data[0] if response.data else None + if task and task.outputs: + for output in task.outputs: + if output.type == "video" and output.url: + return output.url + raise RuntimeError(f"Kling 3.0 Turbo task finished without a video output: {response.model_dump()}") + + +async def execute_kling_turbo( + cls: type[IO.ComfyNode], + *, + prompt: str, + resolution: str, + aspect_ratio: str, + duration: int, + start_frame: torch.Tensor | None, +) -> IO.NodeOutput: + """Create + poll a Kling 3.0 Turbo task. Image-to-video when start_frame is given, else text-to-video.""" + if start_frame is not None: + validate_image_dimensions(start_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1)) + contents = [Kling3TurboContent(type="first_frame", url=tensor_to_base64_string(start_frame))] + if prompt: + contents.insert(0, Kling3TurboContent(type="prompt", text=prompt)) + create = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/image-to-video/kling-3.0-turbo", method="POST"), + response_model=Kling3TurboCreateResponse, + data=Kling3TurboImage2VideoRequest( + contents=contents, + settings=Kling3TurboSettings(resolution=resolution, duration=duration), # i2v: no aspect_ratio + ), + ) + else: + create = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/text-to-video/kling-3.0-turbo", method="POST"), + response_model=Kling3TurboCreateResponse, + data=Kling3TurboText2VideoRequest( + prompt=prompt, + settings=Kling3TurboSettings(resolution=resolution, aspect_ratio=aspect_ratio, duration=duration), + ), + ) + if not (create.data and create.data.id): + raise RuntimeError(f"Kling 3.0 Turbo create failed. Code: {create.code}, Message: {create.message}") + final_response = await poll_op( + cls, + ApiEndpoint(path="/proxy/kling/tasks", query_params={"task_ids": create.data.id}), + response_model=Kling3TurboQueryResponse, + status_extractor=lambda r: (r.data[0].status if r.data else None), + ) + return IO.NodeOutput(await download_url_to_video_output(_turbo_video_url(final_response))) + + class KlingVideoNode(IO.ComfyNode): @classmethod @@ -2884,7 +2951,11 @@ class KlingVideoNode(IO.ComfyNode): ], tooltip="Generate a series of video segments with individual prompts and durations.", ), - IO.Boolean.Input("generate_audio", default=True), + IO.Boolean.Input( + "generate_audio", + default=True, + tooltip="'kling-3.0-turbo' always generates native audio, so the audio toggle is ignored.", + ), IO.DynamicCombo.Input( "model", options=[ @@ -2899,6 +2970,17 @@ class KlingVideoNode(IO.ComfyNode): ), ], ), + IO.DynamicCombo.Option( + "kling-3.0-turbo", + [ + IO.Combo.Input("resolution", options=["1080p", "720p"], default="720p"), + IO.Combo.Input( + "aspect_ratio", + options=["16:9", "9:16", "1:1"], + tooltip="Ignored in image-to-video mode.", + ), + ], + ), ], tooltip="Model and generation settings.", ), @@ -2930,6 +3012,7 @@ class KlingVideoNode(IO.ComfyNode): price_badge=IO.PriceBadge( depends_on=IO.PriceBadgeDepends( widgets=[ + "model", "model.resolution", "generate_audio", "multi_shot", @@ -2944,14 +3027,7 @@ class KlingVideoNode(IO.ComfyNode): ), expr=""" ( - $rates := { - "4k": {"off": 0.42, "on": 0.42}, - "1080p": {"off": 0.112, "on": 0.168}, - "720p": {"off": 0.084, "on": 0.126} - }; $res := $lookup(widgets, "model.resolution"); - $audio := widgets.generate_audio ? "on" : "off"; - $rate := $lookup($lookup($rates, $res), $audio); $ms := widgets.multi_shot; $isSb := $ms != "disabled"; $n := $isSb ? $number($substring($ms, 0, 1)) : 0; @@ -2962,7 +3038,18 @@ class KlingVideoNode(IO.ComfyNode): $d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0; $d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0; $dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration"); - {"type":"usd","usd": $rate * $dur} + widgets.model = "kling-3.0-turbo" + ? {"type":"usd","usd": ($res = "1080p" ? 0.14 : 0.112) * $dur} + : ( + $rates := { + "4k": {"off": 0.42, "on": 0.42}, + "1080p": {"off": 0.112, "on": 0.168}, + "720p": {"off": 0.084, "on": 0.126} + }; + $audio := widgets.generate_audio ? "on" : "off"; + $rate := $lookup($lookup($rates, $res), $audio); + {"type":"usd","usd": $rate * $dur} + ) ) """, ), @@ -3015,6 +3102,17 @@ class KlingVideoNode(IO.ComfyNode): duration = multi_shot["duration"] validate_string(multi_shot["prompt"], min_length=1, max_length=2500) + if model["model"] == "kling-3.0-turbo": + turbo_prompt = build_turbo_shot_prompt(multi_prompt_list) if custom_multi_shot else multi_shot["prompt"] + return await execute_kling_turbo( + cls, + prompt=turbo_prompt, + resolution=model["resolution"], + aspect_ratio=model["aspect_ratio"], + duration=duration, + start_frame=start_frame, + ) + if start_frame is not None: validate_image_dimensions(start_frame, min_width=300, min_height=300) validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1)) From f2270f070a372898ca748299a51dcba5bba0ba53 Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Wed, 17 Jun 2026 19:35:05 -0700 Subject: [PATCH 50/52] feat: add enable_telemetry CLI feature flag (#14530) --- comfy_api/feature_flags.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/comfy_api/feature_flags.py b/comfy_api/feature_flags.py index adb5a3144..0f30608a9 100644 --- a/comfy_api/feature_flags.py +++ b/comfy_api/feature_flags.py @@ -25,6 +25,11 @@ CLI_FEATURE_FLAG_REGISTRY: dict[str, FeatureFlagInfo] = { "default": False, "description": "Show the sign-in button in the frontend even when not signed in", }, + "enable_telemetry": { + "type": "bool", + "default": False, + "description": "Signal the frontend that telemetry collection is enabled", + }, } From 8483c215dc4b3958a75a4b094755446e4de63e23 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Thu, 18 Jun 2026 17:24:05 +0800 Subject: [PATCH 51/52] Update ComfyUI Desktop to Comfy Desktop for consistent product naming (#14533) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c75353d36..bcec86377 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ ComfyUI follows a weekly release cycle targeting Monday but this regularly chang - Commits outside of the stable release tags may be very unstable and break many custom nodes. - Serves as the foundation for the desktop release -2. **[ComfyUI Desktop](https://github.com/Comfy-Org/Comfy-Desktop)** +2. **[Comfy Desktop](https://github.com/Comfy-Org/Comfy-Desktop)** - Builds a new release using the latest stable core version 3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)** From 16514da2e75fe73e592675ef1740eb3c07e4f80c Mon Sep 17 00:00:00 2001 From: Comfy Org PR Bot Date: Thu, 18 Jun 2026 18:27:53 +0900 Subject: [PATCH 52/52] chore(openapi): sync shared API contract from cloud@d10ff72 (#14518) --- openapi.yaml | 195 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 8 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 82ff5b003..2446e64e4 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -673,6 +673,35 @@ components: - created_at - updated_at type: object + JobsCancelRequest: + additionalProperties: false + description: Request to cancel multiple jobs by ID. + properties: + job_ids: + description: Job identifiers (UUIDs) to cancel. + items: + format: uuid + type: string + maxItems: 100 + minItems: 1 + type: array + required: + - job_ids + type: object + JobsCancelResponse: + description: Response for POST /api/jobs/cancel. + properties: + cancelled: + description: | + Job IDs for which a cancel event was successfully dispatched by this + call. Jobs already in a terminal or cancelling state are idempotently + skipped and will not appear here. + items: + type: string + type: array + required: + - cancelled + type: object JobsListResponse: description: Paginated list of jobs for the authenticated user. properties: @@ -1006,7 +1035,7 @@ components: description: If true, clear all pending jobs from the queue type: boolean delete: - description: Array of PENDING job IDs to cancel + description: Array of job IDs to cancel; pending and running jobs transition to cancelled items: type: string type: array @@ -1822,6 +1851,83 @@ paths: summary: Update asset metadata tags: - file + /api/assets/{id}/content: + get: + description: | + Returns the binary content of an asset by ID. + + The contract is the same across runtimes — "GET this path and you + receive the asset's bytes" — but the mechanism differs: + - **Local ComfyUI** streams the bytes directly (`200`, + `application/octet-stream`). + - **Cloud** does not proxy large files; it responds `302` with a + `Location` redirect to a short-lived signed storage URL. Clients that + follow redirects (browsers, `fetch`/XHR, ``/`